Add libgmp 6.2.0 to third_party Don't build it yet. That will come in the next review. Change-Id: Idf3266558165e5ab45f4a41c98cc8c838c8244d5

commit: dace2a60b36477ef4295dea0cd01ae43a65f986f [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Tue Aug 18 10:56:48 2020 -0700
committer: Austin Schuh <austin.linux@gmail.com> Wed Aug 19 16:14:33 2020 -0700
tree: f6c709631ecad56062e08308ae06dd1425613c36
parent: 1b6c20f6a7928823e49afda98bf720d99c5689ba [diff]
diff --git a/third_party/gmp/mpn/arm/README b/third_party/gmp/mpn/arm/README
new file mode 100644
index 0000000..53c7214
--- /dev/null
+++ b/third_party/gmp/mpn/arm/README

@@ -0,0 +1,35 @@
+Copyright 2002, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for ARM processors.  It has been
+optimised mainly for Cortex-A9 and Cortex-A15, but the code in the top-level
+directory should run on all ARM processors at architecture level v4 or later.

diff --git a/third_party/gmp/mpn/arm/aors_n.asm b/third_party/gmp/mpn/arm/aors_n.asm
new file mode 100644
index 0000000..fdad9f7
--- /dev/null
+++ b/third_party/gmp/mpn/arm/aors_n.asm

@@ -0,0 +1,112 @@
+dnl  ARM mpn_add_n and mpn_sub_n
+
+dnl  Contributed to the GNU project by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.5	slightly fluctuating
+C Cortex-A15	 2.25
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`CLRCY',	`cmn	r0, #0')
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, n, #0')
+  define(`func',	mpn_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`CLRCY',	`cmp	r0, r0')
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			and	r0, r0, #1')
+  define(`func',	mpn_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	ldr	r12, [sp, #0]
+	stmfd	sp!, { r8, r9, lr }
+	SETCY(	r12)
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(func)
+	stmfd	sp!, { r8, r9, lr }
+	CLRCY(	r12)
+L(ent):	tst	n, #1
+	beq	L(skip1)
+	ldr	r12, [up], #4
+	ldr	lr, [vp], #4
+	ADDSUBC	r12, r12, lr
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r8, r9 }
+	ldmia	vp!, { r12, lr }
+	ADDSUBC	r8, r8, r12
+	ADDSUBC	r9, r9, lr
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+L(top):	ldmia	up!, { r4, r5, r6, r7 }
+	ldmia	vp!, { r8, r9, r12, lr }
+	ADDSUBC	r4, r4, r8
+	sub	n, n, #4
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r12
+	ADDSUBC	r7, r7, lr
+	stmia	rp!, { r4, r5, r6, r7 }
+	teq	n, #0
+	bne	L(top)
+
+	ldmfd	sp!, { r4, r5, r6, r7 }
+
+L(rtn):	RETVAL
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/aorslsh1_n.asm b/third_party/gmp/mpn/arm/aorslsh1_n.asm
new file mode 100644
index 0000000..889e654
--- /dev/null
+++ b/third_party/gmp/mpn/arm/aorslsh1_n.asm

@@ -0,0 +1,167 @@
+dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	      addlsh1_n       sublsh1_n
+C	     cycles/limb     cycles/limb
+C StrongARM	 ?		 ?
+C XScale	 ?		 ?
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3.12		 3.7
+C Cortex-A15	 ?		 ?
+
+C TODO
+C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
+C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
+C    very much.  If two insns are really needed, it might help to separate them
+C    for better micro-parallelism.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_addlsh1_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, $1, #2')
+  define(`SAVECY',	`sbc	$1, $2, #0')
+  define(`RESTCY',	`cmn	$1, #1')
+  define(`REVCY',	`')
+  define(`INICYR',	`mov	$1, #0')
+  define(`r10r11',	`r11')
+  define(`func',	mpn_addlsh1_n)
+  define(`func_nc',	mpn_addlsh1_nc)')
+ifdef(`OPERATION_sublsh1_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`adc	r0, $1, #1')
+  define(`SAVECY',	`sbc	$1, $1, $1')
+  define(`RESTCY',	`cmn	$1, #1')
+  define(`REVCY',	`sbc	$1, $1, $1
+			cmn	$1, #1')
+  define(`INICYR',	`mvn	$1, #0')
+  define(`r10r11',	`r10')
+  define(`func',	mpn_sublsh1_n)
+  define(`func_nc',	mpn_sublsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r10r11, r14}
+
+ifdef(`OPERATION_addlsh1_n', `
+	mvn	r11, #0
+')
+	INICYR(	r14)
+	subs	n, n, #3
+	blt	L(le2)			C carry clear on branch path
+
+	cmn	r0, #0			C clear carry
+	ldmia	vp!, {r8, r9, r10}
+	b	L(mid)
+
+L(top):	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	ldmia	vp!, {r8, r9, r10}
+	stmia	rp!, {r4, r5, r6}
+	REVCY(r14)
+	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	adcs	r10, r10, r10
+	ldmia	up!, {r4, r5, r6}
+	SAVECY(	r14, r11)
+	subs	n, n, #3
+	blt	L(exi)
+	RESTCY(	r12)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	ldmia	vp!, {r8, r9, r10}
+	stmia	rp!, {r4, r5, r6}
+	REVCY(r12)
+L(mid):	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	adcs	r10, r10, r10
+	ldmia	up!, {r4, r5, r6}
+	SAVECY(	r12, r11)
+	subs	n, n, #3
+	bge	L(top)
+
+	mov	r7, r12			C swap alternating...
+	mov	r12, r14		C ...carry-save...
+	mov	r14, r7			C ...registers
+
+L(exi):	RESTCY(	r12)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	stmia	rp!, {r4, r5, r6}
+
+	REVCY(r12)
+L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
+	beq	L(e1)
+
+L(e02):	tst	n, #2
+	beq	L(rt0)
+	ldm	vp, {r8, r9}
+	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	ldm	up, {r4, r5}
+	SAVECY(	r12, r11)
+	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	stm	rp, {r4, r5}
+	b	L(rt1)
+
+L(e1):	ldr	r8, [vp]
+	adcs	r8, r8, r8
+	ldr	r4, [up]
+	SAVECY(	r12, r11)
+	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	str	r4, [rp]
+
+L(rt1):	mov	r14, r12
+	REVCY(r12)
+L(rt0):	RETVAL(	r14)
+	pop	{r4-r10r11, r14}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/aorsmul_1.asm b/third_party/gmp/mpn/arm/aorsmul_1.asm
new file mode 100644
index 0000000..b02fbb3
--- /dev/null
+++ b/third_party/gmp/mpn/arm/aorsmul_1.asm

@@ -0,0 +1,135 @@
+dnl  ARM mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:     ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.25
+C Cortex-A15	 4
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`vl', `r3')
+define(`rl', `r12')
+define(`ul', `r6')
+define(`r',  `lr')
+
+ifdef(`OPERATION_addmul_1', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`CLRRCY',	`mov	$1, #0
+			adds	r0, r0, #0')
+  define(`RETVAL',	`adc	r0, r4, #0')
+  define(`func',	mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`CLRRCY',	`subs	$1, r0, r0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			sub	r0, $1, r0')
+  define(`func',	mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+	stmfd	sp!, { r4-r6, lr }
+	CLRRCY(	r4)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	umull	r5, r4, ul, vl
+	ADDSUB	r, rl, r5
+	str	r, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ADDSUBC	r, rl, r5
+	str	r, [rp], #4
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	b	L(in)
+
+L(top):	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+L(in):	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	sub	n, n, #4
+	tst	n, n
+	str	r, [rp], #4
+	bne	L(top)
+
+	ADDSUBC	r, rl, r5
+	str	r, [rp]
+
+L(rtn):	RETVAL(	r4)
+	ldmfd	sp!, { r4-r6, pc }
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/arm-defs.m4 b/third_party/gmp/mpn/arm/arm-defs.m4
new file mode 100644
index 0000000..4b4fa0b
--- /dev/null
+++ b/third_party/gmp/mpn/arm/arm-defs.m4

@@ -0,0 +1,100 @@
+divert(-1)
+
+dnl  m4 macros for ARM assembler.
+
+dnl  Copyright 2001, 2012-2016, 2018-2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Standard commenting is with @, the default m4 # is for constants and we
+dnl  don't want to disable macro expansions in or after them.
+
+changecom(@&*$)
+
+define(`ASM_START',
+m4_assert_numargs_range(0,1)
+`ifelse($1,`neon',`.fpu	neon',
+        $1,,`',
+        1,1,`m4_error(`$0 got invalid argument $1')')')
+
+dnl  APCS register names.
+
+deflit(a1,r0)
+deflit(a2,r1)
+deflit(a3,r2)
+deflit(a4,r3)
+dnl deflit(v1,r4)
+dnl deflit(v2,r5)
+dnl deflit(v3,r6)
+dnl deflit(v4,r7)
+dnl deflit(v5,r8)
+dnl deflit(v6,r9)
+deflit(sb,r9)
+dnl deflit(v7,r10)
+deflit(sl,r10)
+deflit(fp,r11)
+deflit(ip,r12)
+dnl deflit(sp,r13)
+deflit(lr,r14)
+deflit(pc,r15)
+
+
+define(`lea_list', `')
+define(`lea_num',0)
+
+dnl  LEA(reg,gmp_symbol)
+dnl
+dnl  Load the address of gmp_symbol into a register.  The gmp_symbol must be
+dnl  either local or protected/hidden, since we assume it has a fixed distance
+dnl  from the point of use.
+
+define(`LEA',`dnl
+ldr	$1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`dnl
+L(bas`'lea_num):dnl
+	add	$1, $1, pc`'dnl
+	m4append(`lea_list',`
+L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2-L(bas'lea_num`)-8')
+	define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+	m4append(`lea_list',`
+L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2')
+	define(`lea_num', eval(lea_num+1))dnl
+')dnl
+')
+
+define(`return',`ifdef(`NOTHUMB',`mov	pc, ',`bx')')
+
+
+define(`EPILOGUE_cpu',
+`lea_list
+	SIZE(`$1',.-`$1')'
+`define(`lea_list', `')')
+
+divert

diff --git a/third_party/gmp/mpn/arm/bdiv_dbm1c.asm b/third_party/gmp/mpn/arm/bdiv_dbm1c.asm
new file mode 100644
index 0000000..b919dc4
--- /dev/null
+++ b/third_party/gmp/mpn/arm/bdiv_dbm1c.asm

@@ -0,0 +1,113 @@
+dnl  ARM mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.25
+C Cortex-A15	 2.5
+
+C TODO
+C  * Try using umlal or umaal.
+C  * Try using ldm/stm.
+
+define(`qp',	  `r0')
+define(`up',	  `r1')
+define(`n',	  `r2')
+define(`bd',	  `r3')
+define(`cy',	  `sp,#0')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+	push	{r4, r5, r6, r7, r8}
+	ldr	r4, [up], #4
+	ldr	r5, [sp, #20]
+	ands	r12, n, #3
+	beq	L(fi0)
+	cmp	r12, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	umull	r8, r12, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo3)
+
+L(fi0):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	subs	n, n, #1
+	umull	r8, r12, r4, bd
+	bls	L(wd1)
+	ldr	r4, [up], #4
+	b	L(lo1)
+
+L(fi2):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+L(top):	ldr	r4, [up], #4
+	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(lo1):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r8
+	str	r5, [qp], #4
+	sbc	r5, r5, r12
+L(lo0):	umull	r8, r12, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(lo3):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r8
+	str	r5, [qp], #4
+	sbc	r5, r5, r12
+L(lo2):	subs	n, n, #4
+	umull	r8, r12, r4, bd
+	bhi	L(top)
+
+L(wd2):	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(wd1):	subs	r5, r5, r8
+	str	r5, [qp]
+	sbc	r0, r5, r12
+	pop	{r4, r5, r6, r7, r8}
+	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/bdiv_q_1.asm b/third_party/gmp/mpn/arm/bdiv_q_1.asm
new file mode 100644
index 0000000..ae395d1
--- /dev/null
+++ b/third_party/gmp/mpn/arm/bdiv_q_1.asm

@@ -0,0 +1,162 @@
+dnl  ARM v4 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C 1176		13	18
+C Cortex-A5	 8	12
+C Cortex-A7	10.5	18
+C Cortex-A8	14	15
+C Cortex-A9	10	12		not measured since latest edits
+C Cortex-A15	 9	 9
+C Cortex-A53	14	20
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`d',   `r3')
+define(`di_arg',  `sp[0]')		C	just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r8')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+	tst	d, #1
+	push	{r6-r11}
+	mov	cnt, #0
+	bne	L(inv)
+
+C count trailing zeros
+	movs	r10, d, lsl #16
+	moveq	d, d, lsr #16
+	moveq	cnt, #16
+	tst	d, #0xff
+	moveq	d, d, lsr #8
+	addeq	cnt, cnt, #8
+	LEA(	r10, ctz_tab)
+	and	r11, d, #0xff
+	ldrb	r10, [r10, r11]
+	mov	d, d, lsr r10
+	add	cnt, cnt, r10
+
+C binvert limb
+L(inv):	LEA(	r10, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r10, [r10, r12, lsr #1]
+	mul	r12, r10, r10
+	mul	r12, d, r12
+	rsb	r12, r12, r10, lsl #1
+	mul	r10, r12, r12
+	mul	r10, d, r10
+	rsb	r10, r10, r12, lsl #1	C r10 = inverse
+	b	L(pi1)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	push	{r6-r11}
+
+	ldr	cnt, [sp, #28]
+	ldr	r10, [sp, #24]
+
+L(pi1):	ldr	r11, [up], #4		C up[0]
+	cmp	cnt, #0
+	mov	cy, #0
+	bne	L(unorm)
+
+L(norm):
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edn)
+
+	ALIGN(16)
+L(tpn):	sbcs	cy, r11, cy
+	ldr	r11, [up], #4
+	sub	n, n, #1
+	mul	r9, r10, cy
+	tst	n, n
+	umull	r12, cy, d, r9
+	str	r9, [rp], #4
+	bne	L(tpn)
+
+L(edn):	sbc	cy, r11, cy
+	mul	r9, r10, cy
+	str	r9, [rp]
+	pop	{r6-r11}
+	return	r14
+
+L(unorm):
+	rsb	tnc, cnt, #32
+	mov	r11, r11, lsr cnt
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	sbcs	cy, r9, cy		C critical path ->cy->cy->
+	sub	n, n, #1
+	mul	r9, r10, cy		C critical path ->cy->r9->
+	tst	n, n
+	umull	r12, cy, d, r9		C critical path ->r9->cy->
+	str	r9, [rp], #4
+	bne	L(tpu)
+
+L(edu):	sbc	cy, r11, cy
+	mul	r9, r10, cy
+	str	r9, [rp]
+	pop	{r6-r11}
+	return	r14
+EPILOGUE()
+
+	RODATA
+ctz_tab:
+	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0

diff --git a/third_party/gmp/mpn/arm/cnd_aors_n.asm b/third_party/gmp/mpn/arm/cnd_aors_n.asm
new file mode 100644
index 0000000..0479f0d
--- /dev/null
+++ b/third_party/gmp/mpn/arm/cnd_aors_n.asm

@@ -0,0 +1,134 @@
+dnl  ARM mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3
+C Cortex-A15	 2.5
+
+define(`cnd',	`r0')
+define(`rp',	`r1')
+define(`up',	`r2')
+define(`vp',	`r3')
+
+define(`n',	`r12')
+
+
+ifdef(`OPERATION_cnd_add_n', `
+	define(`ADDSUB',      adds)
+	define(`ADDSUBC',      adcs)
+	define(`INITCY',      `cmn	r0, #0')
+	define(`RETVAL',      `adc	r0, n, #0')
+	define(func,	      mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+	define(`ADDSUB',      subs)
+	define(`ADDSUBC',      sbcs)
+	define(`INITCY',      `cmp	r0, #0')
+	define(`RETVAL',      `adc	r0, n, #0
+			      rsb	r0, r0, #1')
+	define(func,	      mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r11}
+	ldr	n, [sp, #32]
+
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
+
+	INITCY				C really only needed for n = 0 (mod 4)
+
+	ands	r4, n, #3
+	beq	L(top)
+	cmp	r4, #2
+	bcc	L(b1)
+	beq	L(b2)
+
+L(b3):	ldm	vp!, {r4,r5,r6}
+	ldm	up!, {r8,r9,r10}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
+	ADDSUB	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	ADDSUBC	r10, r10, r6
+	stm	rp!, {r8,r9,r10}
+	sub	n, n, #3
+	teq	n, #0
+	bne	L(top)
+	b	L(end)
+
+L(b2):	ldm	vp!, {r4,r5}
+	ldm	up!, {r8,r9}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	ADDSUB	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	stm	rp!, {r8,r9}
+	sub	n, n, #2
+	teq	n, #0
+	bne	L(top)
+	b	L(end)
+
+L(b1):	ldr	r4, [vp], #4
+	ldr	r8, [up], #4
+	bic	r4, r4, cnd
+	ADDSUB	r8, r8, r4
+	str	r8, [rp], #4
+	sub	n, n, #1
+	teq	n, #0
+	beq	L(end)
+
+L(top):	ldm	vp!, {r4,r5,r6,r7}
+	ldm	up!, {r8,r9,r10,r11}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	ADDSUBC	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	ADDSUBC	r10, r10, r6
+	ADDSUBC	r11, r11, r7
+	sub	n, n, #4
+	stm	rp!, {r8,r9,r10,r11}
+	teq	n, #0
+	bne	L(top)
+
+L(end):	RETVAL
+	pop	{r4-r11}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/com.asm b/third_party/gmp/mpn/arm/com.asm
new file mode 100644
index 0000000..850b10a
--- /dev/null
+++ b/third_party/gmp/mpn/arm/com.asm

@@ -0,0 +1,75 @@
+dnl  ARM mpn_com.
+
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.0
+C Cortex-A15	 1.75
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #4
+	mvn	r3, r3
+	str	r3, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r3, r12 }		C load 2 limbs
+	mvn	r3, r3
+	mvn	r12, r12
+	stmia	rp!, { r3, r12 }		C store 2 limbs
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r7, r8, r9 }		C save regs on stack
+
+L(top):	ldmia	up!, { r3, r8, r9, r12 }	C load 4 limbs
+	subs	n, n, #4
+	mvn	r3, r3
+	mvn	r8, r8
+	mvn	r9, r9
+	mvn	r12, r12
+	stmia	rp!, { r3, r8, r9, r12 }	C store 4 limbs
+	bne	L(top)
+
+	ldmfd	sp!, { r7, r8, r9 }		C restore regs from stack
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/copyd.asm b/third_party/gmp/mpn/arm/copyd.asm
new file mode 100644
index 0000000..bcad98d
--- /dev/null
+++ b/third_party/gmp/mpn/arm/copyd.asm

@@ -0,0 +1,84 @@
+dnl  ARM mpn_copyd.
+
+dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
+
+dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.25-1.5
+C Cortex-A15	 1.25
+
+C TODO
+C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
+C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+	mov	r12, n, lsl #2
+	sub	r12, r12, #4
+	add	rp, rp, r12
+	add	up, up, r12
+
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #-4
+	str	r3, [rp], #-4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmda	up!, { r3,r12 }
+	stmda	rp!, { r3,r12 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	push	{ r4-r5 }
+	subs	n, n, #4
+	ldmda	up!, { r3,r4,r5,r12 }
+	beq	L(end)
+
+L(top):	subs	n, n, #4
+	stmda	rp!, { r3,r4,r5,r12 }
+	ldmda	up!, { r3,r4,r5,r12 }
+	bne	L(top)
+
+L(end):	stmda	rp, { r3,r4,r5,r12 }
+	pop	{ r4-r5 }
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/copyi.asm b/third_party/gmp/mpn/arm/copyi.asm
new file mode 100644
index 0000000..421930f
--- /dev/null
+++ b/third_party/gmp/mpn/arm/copyi.asm

@@ -0,0 +1,79 @@
+dnl  ARM mpn_copyi.
+
+dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
+
+dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.25-1.5
+C Cortex-A15	 1.25
+
+C TODO
+C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
+C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #4
+	str	r3, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r3,r12 }
+	stmia	rp!, { r3,r12 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	push	{ r4-r5 }
+	subs	n, n, #4
+	ldmia	up!, { r3,r4,r5,r12 }
+	beq	L(end)
+
+L(top):	subs	n, n, #4
+	stmia	rp!, { r3,r4,r5,r12 }
+	ldmia	up!, { r3,r4,r5,r12 }
+	bne	L(top)
+
+L(end):	stm	rp, { r3,r4,r5,r12 }
+	pop	{ r4-r5 }
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/dive_1.asm b/third_party/gmp/mpn/arm/dive_1.asm
new file mode 100644
index 0000000..8bffb0c
--- /dev/null
+++ b/third_party/gmp/mpn/arm/dive_1.asm

@@ -0,0 +1,151 @@
+dnl  ARM v4 mpn_divexact_1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb       cycles/limb
+C               norm    unorm    modexact_1c_odd
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	10	12
+C Cortex-A15	 9	 9
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`d',  `r3')
+
+define(`cy', `r7')
+define(`cnt', `r6')
+define(`tnc', `r8')
+
+ASM_START()
+PROLOGUE(mpn_divexact_1)
+	tst	d, #1
+	push	{r4-r9}
+	mov	cnt, #0
+	bne	L(inv)
+
+C count trailing zeros
+	movs	r4, d, lsl #16
+	moveq	d, d, lsr #16
+	moveq	cnt, #16
+	tst	d, #0xff
+	moveq	d, d, lsr #8
+	addeq	cnt, cnt, #8
+	LEA(	r4, ctz_tab)
+	and	r5, d, #0xff
+	ldrb	r4, [r4, r5]
+	mov	d, d, lsr r4
+	add	cnt, cnt, r4
+
+C binvert limb
+L(inv):	LEA(	r4, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, lsl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, lsl #1	C r4 = inverse
+
+	tst	cnt, cnt
+	ldr	r5, [up], #4		C up[0]
+	mov	cy, #0
+	bne	L(unnorm)
+
+L(norm):
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edn)
+
+	ALIGN(16)
+L(tpn):	sbcs	cy, r5, cy
+	ldr	r5, [up], #4
+	sub	n, n, #1
+	mul	r9, r4, cy
+	tst	n, n
+	umull	r12, cy, d, r9
+	str	r9, [rp], #4
+	bne	L(tpn)
+
+L(edn):	sbc	cy, r5, cy
+	mul	r9, r4, cy
+	str	r9, [rp]
+	pop	{r4-r9}
+	return	r14
+
+L(unnorm):
+	rsb	tnc, cnt, #32
+	mov	r5, r5, lsr cnt
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	sbcs	cy, r9, cy		C critical path ->cy->cy->
+	sub	n, n, #1
+	mul	r9, r4, cy		C critical path ->cy->r9->
+	tst	n, n
+	umull	r12, cy, d, r9		C critical path ->r9->cy->
+	str	r9, [rp], #4
+	bne	L(tpu)
+
+L(edu):	sbc	cy, r5, cy
+	mul	r9, r4, cy
+	str	r9, [rp]
+	pop	{r4-r9}
+	return	r14
+EPILOGUE()
+
+	RODATA
+ctz_tab:
+	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0

diff --git a/third_party/gmp/mpn/arm/gmp-mparam.h b/third_party/gmp/mpn/arm/gmp-mparam.h
new file mode 100644
index 0000000..87eec3a
--- /dev/null
+++ b/third_party/gmp/mpn/arm/gmp-mparam.h

@@ -0,0 +1,127 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1193MHz ARM (gcc55.fsffrance.org) */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         56
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     71
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                   0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           41
+
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               125
+#define MUL_TOOM44_THRESHOLD               193
+#define MUL_TOOM6H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               418
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     125
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     176
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
+
+#define SQR_BASECASE_THRESHOLD              12
+#define SQR_TOOM2_THRESHOLD                 78
+#define SQR_TOOM3_THRESHOLD                137
+#define SQR_TOOM4_THRESHOLD                212
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                422
+
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               26
+
+#define MUL_FFT_MODF_THRESHOLD             436  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    436, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 28
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    404, 5}, {     13, 4}, {     27, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {    512,10}, \
+    {   1024,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 26
+#define SQR_FFT_THRESHOLD                 3776
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 137
+#define MULLO_MUL_N_THRESHOLD            11479
+
+#define DC_DIV_QR_THRESHOLD                150
+#define DC_DIVAPPR_Q_THRESHOLD             494
+#define DC_BDIV_QR_THRESHOLD               148
+#define DC_BDIV_Q_THRESHOLD                345
+
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               474
+#define INV_APPR_THRESHOLD                 478
+
+#define BINV_NEWTON_THRESHOLD              542
+#define REDC_1_TO_REDC_N_THRESHOLD         117
+
+#define MU_DIV_QR_THRESHOLD               2089
+#define MU_DIVAPPR_Q_THRESHOLD            2172
+#define MUPI_DIV_QR_THRESHOLD              225
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     197
+#define GCD_DC_THRESHOLD                   902
+#define GCDEXT_DC_THRESHOLD                650
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                20
+#define GET_STR_PRECOMPUTE_THRESHOLD        39
+#define SET_STR_DC_THRESHOLD              1045
+#define SET_STR_PRECOMPUTE_THRESHOLD      2147

diff --git a/third_party/gmp/mpn/arm/invert_limb.asm b/third_party/gmp/mpn/arm/invert_limb.asm
new file mode 100644
index 0000000..af7502d
--- /dev/null
+++ b/third_party/gmp/mpn/arm/invert_limb.asm

@@ -0,0 +1,93 @@
+dnl  ARM mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+	LEA(	r2, approx_tab-512)
+	mov	r3, r0, lsr #23
+	mov	r3, r3, asl #1
+	ldrh	r3, [r3, r2]
+	mov	r1, r3, asl #17
+	mul	r12, r3, r3
+	umull	r3, r2, r12, r0
+	sub	r1, r1, r2, asl #1
+	umull	r3, r2, r1, r1
+	umull	r12, r3, r0, r3
+	umull	r2, r12, r0, r2
+	adds	r2, r2, r3
+	adc	r12, r12, #0
+	rsb	r1, r12, r1
+	mvn	r2, r2, lsr #30
+	add	r2, r2, r1, asl #2
+	umull	r12, r3, r0, r2
+	adds	r1, r12, r0
+	adc	r3, r3, r0
+	rsb	r0, r3, r2
+	return	lr
+EPILOGUE()
+
+	RODATA
+	ALIGN(2)
+approx_tab:
+	.short    0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
+	.short    0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
+	.short    0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
+	.short    0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
+	.short    0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
+	.short    0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
+	.short    0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
+	.short    0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
+	.short    0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
+	.short    0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
+	.short    0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
+	.short    0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
+	.short    0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
+	.short    0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
+	.short    0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
+	.short    0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
+	.short    0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
+	.short    0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
+	.short    0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
+	.short    0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
+	.short    0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
+	.short    0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
+	.short    0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
+	.short    0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
+	.short    0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
+	.short    0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
+	.short    0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
+	.short    0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
+	.short    0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
+	.short    0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
+	.short    0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
+	.short    0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
+ASM_END()

diff --git a/third_party/gmp/mpn/arm/logops_n.asm b/third_party/gmp/mpn/arm/logops_n.asm
new file mode 100644
index 0000000..7e04165
--- /dev/null
+++ b/third_party/gmp/mpn/arm/logops_n.asm

@@ -0,0 +1,139 @@
+dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb             cycles/limb
+C          and andn ior xor         nand iorn nior xnor
+C StrongARM	 ?			 ?
+C XScale	 ?			 ?
+C Cortex-A7	 ?			 ?
+C Cortex-A8	 ?			 ?
+C Cortex-A9	2.5-2.72		2.75-3
+C Cortex-A15	2.25			2.75
+
+C TODO
+C  * It seems that 2.25 c/l and 2.75 c/l is possible for A9.
+C  * Debug popping issue, see comment below.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic	$1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `bic	$1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{ r8, r9, r10 }
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r10, [vp], #4
+	ldr	r12, [up], #4
+	LOGOP(	r12, r12, r10)
+	POSTOP(	r12)
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	vp!, { r10, r12 }
+	ldmia	up!, { r8, r9 }
+	LOGOP(	r8, r8, r10)
+	LOGOP(	r9, r9, r12)
+	POSTOP(	r8)
+	POSTOP(	r9)
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	push	{ r4, r5, r6, r7 }
+
+	ldmia	vp!, { r8, r9, r10, r12 }
+	b	L(mid)
+
+L(top):	ldmia	vp!, { r8, r9, r10, r12 }
+	POSTOP(	r4)
+	POSTOP(	r5)
+	POSTOP(	r6)
+	POSTOP(	r7)
+	stmia	rp!, { r4, r5, r6, r7 }
+L(mid):	sub	n, n, #4
+	ldmia	up!, { r4, r5, r6, r7 }
+	teq	n, #0
+	LOGOP(	r4, r4, r8)
+	LOGOP(	r5, r5, r9)
+	LOGOP(	r6, r6, r10)
+	LOGOP(	r7, r7, r12)
+	bne	L(top)
+
+	POSTOP(	r4)
+	POSTOP(	r5)
+	POSTOP(	r6)
+	POSTOP(	r7)
+	stmia	rp!, { r4, r5, r6, r7 }
+
+	pop	{ r4, r5, r6, r7 }	C popping r8-r10 here strangely fails
+
+L(rtn):	pop	{ r8, r9, r10 }
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/lshift.asm b/third_party/gmp/mpn/arm/lshift.asm
new file mode 100644
index 0000000..1d5ce0a
--- /dev/null
+++ b/third_party/gmp/mpn/arm/lshift.asm

@@ -0,0 +1,88 @@
+dnl  ARM mpn_lshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.5
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	add	up, up, n, lsl #2
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up, #-4]!
+	add	rp, rp, n, lsl #2
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsl cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #-4]!
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #-4]!
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(mid):	ldr	r6, [up, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r8, lsl cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(1):	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/lshiftc.asm b/third_party/gmp/mpn/arm/lshiftc.asm
new file mode 100644
index 0000000..e5b52df
--- /dev/null
+++ b/third_party/gmp/mpn/arm/lshiftc.asm

@@ -0,0 +1,95 @@
+dnl  ARM mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.0
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+	add	up, up, n, lsl #2
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up, #-4]!
+	add	rp, rp, n, lsl #2
+	rsb	tnc, cnt, #32
+	mvn	r6, r4
+
+	mov	r7, r6, lsl cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #-4]!
+	mvn	r8, r8
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #-4]!
+	mvn	r6, r6
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r8, r8
+	mov	r7, r6, lsl cnt
+L(mid):	ldr	r6, [up, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r6, r6
+	mov	r7, r8, lsl cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(1):	mvn	r6, #0
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/mod_34lsub1.asm b/third_party/gmp/mpn/arm/mod_34lsub1.asm
new file mode 100644
index 0000000..596cd3c
--- /dev/null
+++ b/third_party/gmp/mpn/arm/mod_34lsub1.asm

@@ -0,0 +1,124 @@
+dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A5	 2.67
+C Cortex-A7	 2.35
+C Cortex-A8	 2.0
+C Cortex-A9	 1.33
+C Cortex-A15	 1.33
+C Cortex-A17	 3.34
+C Cortex-A53	 2.0
+
+define(`ap',	r0)
+define(`n',	r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Write cleverer summation code.
+C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	push	{ r4, r5, r6, r7 }
+
+	subs	n, n, #3
+	mov	r7, #0
+	blt	L(le2)			C n <= 2
+
+	ldmia	ap!, { r2, r3, r12 }
+	subs	n, n, #3
+	blt	L(sum)			C n <= 5
+	cmn	r0, #0			C clear carry
+	sub	n, n, #3
+	b	L(mid)
+
+L(top):	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+L(mid):	ldmia	ap!, { r4, r5, r6 }
+	tst	n, n
+	sub	n, n, #3
+	bpl	L(top)
+
+	add	n, n, #3
+
+	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+	movcs	r7, #1			C r7 <= 1
+
+L(sum):	cmn	n, #2
+	movlo	r4, #0
+	ldrhs	r4, [ap], #4
+	movls	r5, #0
+	ldrhi	r5, [ap], #4
+
+	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, #0
+	adc	r7, r7, #0		C r7 <= 2
+
+L(sum2):
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	add	r0, r0, r7
+
+	mov	r7, r3, lsl #8
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r3, lsr #16
+
+	mov	r7, r12, lsl #16
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r12, lsr #8
+
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+
+L(le2):	cmn	n, #1
+	bne	L(1)
+	ldmia	ap!, { r2, r3 }
+	mov	r12, #0
+	b	L(sum2)
+L(1):	ldr	r2, [ap]
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/mode1o.asm b/third_party/gmp/mpn/arm/mode1o.asm
new file mode 100644
index 0000000..63a7f36
--- /dev/null
+++ b/third_party/gmp/mpn/arm/mode1o.asm

@@ -0,0 +1,92 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	10
+C Cortex-A15	 9
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+	.protected	binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+	stmfd	sp!, {r4, r5}
+
+	LEA(	r4, binvert_limb_table)
+
+	ldr	r5, [up], #4		C up[0]
+
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, asl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, asl #1	C r4 = inverse
+
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(end)
+
+L(top):	sbcs	cy, r5, cy
+	ldr	r5, [up], #4
+	sub	n, n, #1
+	mul	r12, r4, cy
+	tst	n, n
+	umull	r12, cy, d, r12
+	bne	L(top)
+
+L(end):	sbcs	cy, r5, cy
+	mul	r12, r4, cy
+	umull	r12, r0, d, r12
+	addcc	r0, r0, #1
+
+	ldmfd	sp!, {r4, r5}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/mul_1.asm b/third_party/gmp/mpn/arm/mul_1.asm
new file mode 100644
index 0000000..f7bc1bc
--- /dev/null
+++ b/third_party/gmp/mpn/arm/mul_1.asm

@@ -0,0 +1,94 @@
+dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl  in a second limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	6-8
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.75
+C Cortex-A15	 ?
+
+C We should rewrite this along the lines of addmul_1.asm.  That should save a
+C cycle on StrongARM, and several cycles on XScale.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	stmfd	sp!, { r8, r9, lr }
+	ands	r12, n, #1
+	beq	L(skip1)
+	ldr	lr, [up], #4
+	umull	r9, r12, lr, vl
+	str	r9, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	mov	r8, r12
+	ldmia	up!, { r12, lr }
+	mov	r9, #0
+	umlal	r8, r9, r12, vl
+	mov	r12, #0
+	umlal	r9, r12, lr, vl
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r6, r7 }
+
+L(top):	mov	r6, r12
+	ldmia	up!, { r8, r9, r12, lr }
+	ldr	r7, [rp, #12]			C cache allocate
+	mov	r7, #0
+	umlal	r6, r7, r8, vl
+	mov	r8, #0
+	umlal	r7, r8, r9, vl
+	mov	r9, #0
+	umlal	r8, r9, r12, vl
+	mov	r12, #0
+	umlal	r9, r12, lr, vl
+	subs	n, n, #4
+	stmia	rp!, { r6, r7, r8, r9 }
+	bne	L(top)
+
+	ldmfd	sp!, { r6, r7 }
+
+L(rtn):	mov	r0, r12
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/neon/README b/third_party/gmp/mpn/arm/neon/README
new file mode 100644
index 0000000..79e3b48
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/README

@@ -0,0 +1,2 @@
+This directory contains Neon code which runs and is efficient on all
+ARM CPUs which support Neon.

diff --git a/third_party/gmp/mpn/arm/neon/hamdist.asm b/third_party/gmp/mpn/arm/neon/hamdist.asm
new file mode 100644
index 0000000..2320896
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/hamdist.asm

@@ -0,0 +1,194 @@
+dnl  ARM Neon mpn_hamdist -- mpn bit hamming distance.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.89
+C Cortex-A15	 0.95
+
+C TODO
+C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
+C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
+C    popcount. Except perhaps also for popcount for the edge loads.)
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+C INPUT PARAMETERS
+define(`ap', r0)
+define(`bp', r1)
+define(`n',  r2)
+
+C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
+C can be represented as a 8-bit ARM constant.
+C
+define(`chunksize',0x3f80)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+
+	cmp	n, #chunksize
+	bhi	L(gt16k)
+
+L(lt16k):
+	vmov.i64   q8, #0		C clear summation register
+	vmov.i64   q9, #0		C clear summation register
+
+	tst	   n, #1
+	beq	   L(xxx0)
+	vmov.i64   d0, #0
+	vmov.i64   d20, #0
+	sub	   n, n, #1
+	vld1.32   {d0[0]}, [ap]!	C load 1 limb
+	vld1.32   {d20[0]}, [bp]!	C load 1 limb
+	veor	   d0, d0, d20
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
+
+L(xxx0):tst	   n, #2
+	beq	   L(xx00)
+	sub	   n, n, #2
+	vld1.32    {d0}, [ap]!		C load 2 limbs
+	vld1.32    {d20}, [bp]!		C load 2 limbs
+	veor	   d0, d0, d20
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24
+
+L(xx00):tst	   n, #4
+	beq	   L(x000)
+	sub	   n, n, #4
+	vld1.32    {q0}, [ap]!		C load 4 limbs
+	vld1.32    {q10}, [bp]!		C load 4 limbs
+	veor	   q0, q0, q10
+	vcnt.8	   q12, q0
+	vpadal.u8  q8, q12
+
+L(x000):tst	   n, #8
+	beq	   L(0000)
+
+	subs	   n, n, #8
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	bls	   L(sum)
+
+L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	sub	   n, n, #8
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	b	   L(mid)
+
+L(0000):subs	   n, n, #16
+	blo	   L(e0)
+
+	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	veor	   q2, q2, q14
+	veor	   q3, q3, q15
+	vcnt.8	   q12, q2
+	vcnt.8	   q13, q3
+	subs	   n, n, #16
+	blo	   L(end)
+
+L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q0
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q1
+L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	veor	   q2, q2, q14
+	veor	   q3, q3, q15
+	subs	   n, n, #16
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q2
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q3
+	bhs	   L(top)
+
+L(end):	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+L(sum):	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+	vadd.i16   q8, q8, q9
+					C we have 8 16-bit counts
+L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
+	vpaddl.u32 q8, q8		C we have 2 64-bit counts
+	vmov.32    r0, d16[0]
+	vmov.32    r1, d17[0]
+	add	   r0, r0, r1
+	bx	lr
+
+C Code for large count.  Splits operand and calls above code.
+define(`ap2', r5)
+define(`bp2', r6)
+L(gt16k):
+	push	{r4,r5,r6,r14}
+	mov	ap2, ap
+	mov	bp2, bp
+	mov	r3, n			C full count
+	mov	r4, #0			C total sum
+
+1:	mov	n, #chunksize		C count for this invocation
+	bl	L(lt16k)		C could jump deep inside code
+	add	ap2, ap2, #chunksize*4	C point at next chunk
+	add	bp2, bp2, #chunksize*4	C point at next chunk
+	add	r4, r4, r0
+	mov	ap, ap2			C put chunk pointer in place for call
+	mov	bp, bp2			C put chunk pointer in place for call
+	sub	r3, r3, #chunksize
+	cmp	r3, #chunksize
+	bhi	1b
+
+	mov	n, r3			C count for final invocation
+	bl	L(lt16k)
+	add	r0, r4, r0
+	pop	{r4,r5,r6,pc}
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/neon/lorrshift.asm b/third_party/gmp/mpn/arm/neon/lorrshift.asm
new file mode 100644
index 0000000..7ebc780
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/lorrshift.asm

@@ -0,0 +1,279 @@
+dnl  ARM Neon mpn_lshift and mpn_rshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb     cycles/limb     cycles/limb      good
+C              aligned	      unaligned	      best seen	     for cpu?
+C StrongARM	 -		 -
+C XScale	 -		 -
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3		 3				Y
+C Cortex-A15	 1.5		 1.5				Y
+
+
+C We read 64 bits at a time at 32-bit aligned addresses, and except for the
+C first and last store, we write using 64-bit aligned addresses.  All shifting
+C is done on 64-bit words in 'extension' registers.
+C
+C It should be possible to read also using 64-bit alignment, by manipulating
+C the shift count for unaligned operands.  Not done, since it does not seem to
+C matter for A9 or A15.
+C
+C This will not work in big-endian mode.
+
+C TODO
+C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
+C    which might make it tricky.
+C  * Clean up and simplify.
+C  * Consider sharing most of the code for lshift and rshift, since the feed-in
+C    code, the loop, and most of the wind-down code are identical.
+C  * Replace the basecase code with code using 'extension' registers.
+C  * Optimise.  It is not clear that this loop insn permutation is optimal for
+C    either A9 or A15.
+
+C INPUT PARAMETERS
+define(`rp',  `r0')
+define(`ap',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+
+ifdef(`OPERATION_lshift',`
+	define(`IFLSH', `$1')
+	define(`IFRSH', `')
+	define(`X',`0')
+	define(`Y',`1')
+	define(`func',`mpn_lshift')
+')
+ifdef(`OPERATION_rshift',`
+	define(`IFLSH', `')
+	define(`IFRSH', `$1')
+	define(`X',`1')
+	define(`Y',`0')
+	define(`func',`mpn_rshift')
+')
+
+MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
+
+ASM_START(neon)
+	TEXT
+	ALIGN(64)
+PROLOGUE(func)
+IFLSH(`	mov	r12, n, lsl #2	')
+IFLSH(`	add	rp, rp, r12	')
+IFLSH(`	add	ap, ap, r12	')
+
+	cmp	n, #4			C SIMD code n limit
+	ble	L(base)
+
+ifdef(`OPERATION_lshift',`
+	vdup.32	d6, r3			C left shift count is positive
+	sub	r3, r3, #64		C right shift count is negative
+	vdup.32	d7, r3
+	mov	r12, #-8')		C lshift pointer update offset
+ifdef(`OPERATION_rshift',`
+	rsb	r3, r3, #0		C right shift count is negative
+	vdup.32	d6, r3
+	add	r3, r3, #64		C left shift count is positive
+	vdup.32	d7, r3
+	mov	r12, #8')		C rshift pointer update offset
+
+IFLSH(`	sub	ap, ap, #8	')
+	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
+	vshl.u64 d18, d19, d7		C retval
+
+	tst	rp, #4			C is rp 64-bit aligned already?
+	beq	L(rp_aligned)		C yes, skip
+IFLSH(`	add	ap, ap, #4	')	C move back ap pointer
+IFRSH(`	sub	ap, ap, #4	')	C move back ap pointer
+	vshl.u64 d4, d19, d6
+	sub	n, n, #1		C first limb handled
+IFLSH(`	sub	 rp, rp, #4	')
+	vst1.32	 {d4[Y]}, [rp]IFRSH(!)	C store first limb, rp gets aligned
+	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
+
+L(rp_aligned):
+IFLSH(`	sub	rp, rp, #8	')
+	subs	n, n, #6
+	blt	L(two_or_three_more)
+	tst	n, #2
+	beq	L(2)
+
+L(1):	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d5, d19, d6
+	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	sub	n, n, #2
+	b	 L(mid)
+
+L(2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	subs	n, n, #4
+	blt	L(end)
+
+L(top):	vld1.32	 {d16}, [ap], r12
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+L(mid):	vld1.32	 {d17}, [ap], r12
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	subs	n, n, #4
+	bge	L(top)
+
+L(end):	tst	 n, #1
+	beq	 L(evn)
+
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	b	 L(cj1)
+
+L(evn):	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d16, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+	vorr	 d2, d5, d0
+	b	 L(cj2)
+
+C Load last 2 - 3 limbs, store last 4 - 5 limbs
+L(two_or_three_more):
+	tst	n, #1
+	beq	L(l2)
+
+L(l3):	vshl.u64 d5, d19, d6
+	vld1.32	 {d17}, [ap], r12
+L(cj1):	veor	 d16, d16, d16
+IFLSH(`	add	 ap, ap, #4	')
+	vld1.32	 {d16[Y]}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+IFLSH(`	add	 rp, rp, #4	')
+	vst1.32	 {d5[Y]}, [rp]
+	vmov.32	 r0, d18[X]
+	bx	lr
+
+L(l2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vshl.u64 d1, d16, d7
+	vshl.u64 d16, d16, d6
+	vorr	 d2, d4, d1
+L(cj2):	vst1.32	 {d2}, [rp:64], r12
+	vst1.32	 {d16}, [rp]
+	vmov.32	 r0, d18[X]
+	bx	lr
+
+
+define(`tnc', `r12')
+L(base):
+	push	{r4, r6, r7, r8}
+ifdef(`OPERATION_lshift',`
+	ldr	r4, [ap, #-4]!
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsl cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #-4]!
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #-4]!
+	subs	n, n, #2
+	beq	L(ed)			C n = 3
+					C n = 4
+L(tp):	ldr	r8, [ap, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(md):	ldr	r6, [ap, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r8, lsl cnt
+
+L(ed):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(ed1):	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+')
+ifdef(`OPERATION_rshift',`
+	ldr	r4, [ap]
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsr cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #4]!
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #4]!
+	subs	n, n, #2
+	beq	L(ed)			C n = 2
+					C n = 4
+
+L(tp):	ldr	r8, [ap, #4]!
+	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(md):	ldr	r6, [ap, #4]!
+	orr	r7, r7, r8, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r8, lsr cnt
+
+L(ed):	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(ed1):	str	r7, [rp], #4
+	mov	r0, r4, lsl tnc
+')
+	pop	{r4, r6, r7, r8}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/neon/lshiftc.asm b/third_party/gmp/mpn/arm/neon/lshiftc.asm
new file mode 100644
index 0000000..f1bf0de
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/lshiftc.asm

@@ -0,0 +1,242 @@
+dnl  ARM Neon mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb     cycles/limb     cycles/limb      good
+C              aligned	      unaligned	      best seen	     for cpu?
+C StrongARM	 -		 -
+C XScale	 -		 -
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3.5		 3.5				Y
+C Cortex-A15	 1.75		 1.75				Y
+
+
+C We read 64 bits at a time at 32-bit aligned addresses, and except for the
+C first and last store, we write using 64-bit aligned addresses.  All shifting
+C is done on 64-bit words in 'extension' registers.
+C
+C It should be possible to read also using 64-bit alignment, by manipulating
+C the shift count for unaligned operands.  Not done, since it does not seem to
+C matter for A9 or A15.
+C
+C This will not work in big-endian mode.
+
+C TODO
+C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
+C    which might make it tricky.
+C  * Clean up and simplify.
+C  * Consider sharing most of the code for lshift and rshift, since the feed-in
+C    code, the loop, and most of the wind-down code are identical.
+C  * Replace the basecase code with code using 'extension' registers.
+C  * Optimise.  It is not clear that this loop insn permutation is optimal for
+C    either A9 or A15.
+
+C INPUT PARAMETERS
+define(`rp',  `r0')
+define(`ap',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+
+ASM_START(neon)
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_lshiftc)
+	mov	r12, n, lsl #2
+	add	rp, rp, r12
+	add	ap, ap, r12
+
+	cmp	n, #4			C SIMD code n limit
+	ble	L(base)
+
+	vdup.32	d6, r3			C left shift count is positive
+	sub	r3, r3, #64		C right shift count is negative
+	vdup.32	d7, r3
+	mov	r12, #-8		C lshift pointer update offset
+
+	sub	ap, ap, #8
+	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
+	vshl.u64 d18, d19, d7		C retval
+
+	tst	rp, #4			C is rp 64-bit aligned already?
+	beq	L(rp_aligned)		C yes, skip
+	vmvn	 d19, d19
+	add	ap, ap, #4		C move back ap pointer
+	vshl.u64 d4, d19, d6
+	sub	n, n, #1		C first limb handled
+	sub	 rp, rp, #4
+	vst1.32	 {d4[1]}, [rp]		C store first limb, rp gets aligned
+	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
+
+L(rp_aligned):
+	sub	rp, rp, #8
+	subs	n, n, #6
+	vmvn	 d19, d19
+	blt	L(two_or_three_more)
+	tst	n, #2
+	beq	L(2)
+
+L(1):	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d5, d19, d6
+	vmvn	 d17, d17
+	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	sub	n, n, #2
+	b	 L(mid)
+
+L(2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vmvn	 d16, d16
+	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	subs	n, n, #4
+	blt	L(end)
+
+L(top):	vmvn	 d17, d17
+	vld1.32	 {d16}, [ap], r12
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+L(mid):	vmvn	 d16, d16
+	vld1.32	 {d17}, [ap], r12
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	subs	n, n, #4
+	bge	L(top)
+
+L(end):	tst	 n, #1
+	beq	 L(evn)
+
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	b	 L(cj1)
+
+L(evn):	vmvn	 d17, d17
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+	vmov.u8	 d17, #255
+	vorr	 d2, d5, d0
+	vshl.u64 d0, d17, d7
+	vorr	 d3, d4, d0
+	b	 L(cj2)
+
+C Load last 2 - 3 limbs, store last 4 - 5 limbs
+L(two_or_three_more):
+	tst	n, #1
+	beq	L(l2)
+
+L(l3):	vshl.u64 d5, d19, d6
+	vld1.32	 {d17}, [ap], r12
+L(cj1):	vmov.u8	 d16, #0
+	add	 ap, ap, #4
+	vmvn	 d17, d17
+	vld1.32	 {d16[1]}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vmvn	 d16, d16
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	add	 rp, rp, #4
+	vst1.32	 {d5[1]}, [rp]
+	vmov.32	 r0, d18[0]
+	bx	lr
+
+L(l2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vmvn	 d16, d16
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vmov.u8	 d17, #255
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vorr	 d3, d5, d0
+L(cj2):	vst1.32	 {d2}, [rp:64], r12
+	vst1.32	 {d3}, [rp]
+	vmov.32	 r0, d18[0]
+	bx	lr
+
+
+define(`tnc', `r12')
+L(base):
+	push	{r4, r6, r7, r8}
+	ldr	r4, [ap, #-4]!
+	rsb	tnc, cnt, #32
+	mvn	r6, r4
+
+	mov	r7, r6, lsl cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #-4]!
+	mvn	r8, r8
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #-4]!
+	mvn	r6, r6
+	subs	n, n, #2
+	beq	L(ed)			C n = 3
+					C n = 4
+L(tp):	ldr	r8, [ap, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r8, r8
+	mov	r7, r6, lsl cnt
+L(md):	ldr	r6, [ap, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r6, r6
+	mov	r7, r8, lsl cnt
+
+L(ed):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(ed1):	mvn	r6, #0
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/neon/popcount.asm b/third_party/gmp/mpn/arm/neon/popcount.asm
new file mode 100644
index 0000000..2f8f9af
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/popcount.asm

@@ -0,0 +1,166 @@
+dnl  ARM Neon mpn_popcount -- mpn bit population count.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.125
+C Cortex-A15	 0.56
+
+C TODO
+C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
+C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
+C    popcount. Except perhaps also for popcount for the edge loads.)
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+C INPUT PARAMETERS
+define(`ap', r0)
+define(`n',  r1)
+
+C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
+C can be represented as a 8-bit ARM constant.
+C
+define(`chunksize',0x3f80)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+
+	cmp	n, #chunksize
+	bhi	L(gt16k)
+
+L(lt16k):
+	vmov.i64   q8, #0		C clear summation register
+	vmov.i64   q9, #0		C clear summation register
+
+	tst	   n, #1
+	beq	   L(xxx0)
+	vmov.i64   d0, #0
+	sub	   n, n, #1
+	vld1.32   {d0[0]}, [ap]!	C load 1 limb
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
+
+L(xxx0):tst	   n, #2
+	beq	   L(xx00)
+	sub	   n, n, #2
+	vld1.32    {d0}, [ap]!		C load 2 limbs
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24
+
+L(xx00):tst	   n, #4
+	beq	   L(x000)
+	sub	   n, n, #4
+	vld1.32    {q0}, [ap]!		C load 4 limbs
+	vcnt.8	   q12, q0
+	vpadal.u8  q8, q12
+
+L(x000):tst	   n, #8
+	beq	   L(0000)
+
+	subs	   n, n, #8
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	bls	   L(sum)
+
+L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	sub	   n, n, #8
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	b	   L(mid)
+
+L(0000):subs	   n, n, #16
+	blo	   L(e0)
+
+	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vcnt.8	   q12, q2
+	vcnt.8	   q13, q3
+	subs	   n, n, #16
+	blo	   L(end)
+
+L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q0
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q1
+L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	subs	   n, n, #16
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q2
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q3
+	bhs	   L(top)
+
+L(end):	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+L(sum):	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+	vadd.i16   q8, q8, q9
+					C we have 8 16-bit counts
+L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
+	vpaddl.u32 q8, q8		C we have 2 64-bit counts
+	vmov.32    r0, d16[0]
+	vmov.32    r1, d17[0]
+	add	   r0, r0, r1
+	bx	lr
+
+C Code for large count.  Splits operand and calls above code.
+define(`ap2', r2)			C caller-saves reg not used above
+L(gt16k):
+	push	{r4,r14}
+	mov	ap2, ap
+	mov	r3, n			C full count
+	mov	r4, #0			C total sum
+
+1:	mov	n, #chunksize		C count for this invocation
+	bl	L(lt16k)		C could jump deep inside code
+	add	ap2, ap2, #chunksize*4	C point at next chunk
+	add	r4, r4, r0
+	mov	ap, ap2			C put chunk pointer in place for call
+	sub	r3, r3, #chunksize
+	cmp	r3, #chunksize
+	bhi	1b
+
+	mov	n, r3			C count for final invocation
+	bl	L(lt16k)
+	add	r0, r4, r0
+	pop	{r4,pc}
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/neon/sec_tabselect.asm b/third_party/gmp/mpn/arm/neon/sec_tabselect.asm
new file mode 100644
index 0000000..69fceb0
--- /dev/null
+++ b/third_party/gmp/mpn/arm/neon/sec_tabselect.asm

@@ -0,0 +1,140 @@
+dnl  ARM Neon mpn_sec_tabselect.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.15
+C Cortex-A15	 0.65
+
+define(`rp',     `r0')
+define(`tp',     `r1')
+define(`n',      `r2')
+define(`nents',  `r3')
+C define(`which',  on stack)
+
+define(`i',      `r4')
+define(`j',      `r5')
+
+define(`maskq',  `q10')
+define(`maskd',  `d20')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+	push	{r4-r5}
+
+	add	  r4, sp, #8
+	vld1.32	  {d30[], d31[]}, [r4]	C 4 `which' copies
+	vmov.i32  q14, #1		C 4 copies of 1
+
+	subs	j, n, #8
+	bmi	L(outer_end)
+
+L(outer_top):
+	mov	  i, nents
+	mov	  r12, tp		C preserve tp
+	veor	  q13, q13, q13		C 4 counter copies
+	veor	  q2, q2, q2
+	veor	  q3, q3, q3
+	ALIGN(16)
+L(top):	vceq.i32  maskq, q13, q15	C compare idx copies to `which' copies
+	vld1.32	  {q0,q1}, [tp]
+	vadd.i32  q13, q13, q14
+	vbit	  q2, q0, maskq
+	vbit	  q3, q1, maskq
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(top)
+	vst1.32	  {q2,q3}, [rp]!
+	add	  tp, r12, #32		C restore tp, point to next slice
+	subs	  j, j, #8
+	bpl	  L(outer_top)
+L(outer_end):
+
+	tst	  n, #4
+	beq	  L(b0xx)
+L(b1xx):mov	  i, nents
+	mov	  r12, tp
+	veor	  q13, q13, q13
+	veor	  q2, q2, q2
+	ALIGN(16)
+L(tp4):	vceq.i32  maskq, q13, q15
+	vld1.32	  {q0}, [tp]
+	vadd.i32  q13, q13, q14
+	vbit	  q2, q0, maskq
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp4)
+	vst1.32	  {q2}, [rp]!
+	add	  tp, r12, #16
+
+L(b0xx):tst	  n, #2
+	beq	  L(b00x)
+L(b01x):mov	  i, nents
+	mov	  r12, tp
+	veor	  d26, d26, d26
+	veor	  d4, d4, d4
+	ALIGN(16)
+L(tp2):	vceq.i32  maskd, d26, d30
+	vld1.32	  {d0}, [tp]
+	vadd.i32  d26, d26, d28
+	vbit	  d4, d0, maskd
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp2)
+	vst1.32	  {d4}, [rp]!
+	add	  tp, r12, #8
+
+L(b00x):tst	  n, #1
+	beq	  L(b000)
+L(b001):mov	  i, nents
+	mov	  r12, tp
+	veor	  d26, d26, d26
+	veor	  d4, d4, d4
+	ALIGN(16)
+L(tp1):	vceq.i32  maskd, d26, d30
+	vld1.32	  {d0[0]}, [tp]
+	vadd.i32  d26, d26, d28
+	vbit	  d4, d0, maskd
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp1)
+	vst1.32	  {d4[0]}, [rp]
+
+L(b000):pop	{r4-r5}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/rsh1aors_n.asm b/third_party/gmp/mpn/arm/rsh1aors_n.asm
new file mode 100644
index 0000000..f2e3006
--- /dev/null
+++ b/third_party/gmp/mpn/arm/rsh1aors_n.asm

@@ -0,0 +1,124 @@
+dnl  ARM mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	3.64-3.7
+C Cortex-A15	 2.5
+
+C TODO
+C  * Not optimised.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`RSTCY',	`cmn	$1, $1')
+  define(`func',	mpn_rsh1add_n)
+  define(`func_nc',	mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`RSTCY',
+	`mvn	$2, #0x80000000
+	cmp	$2, $1')
+  define(`func',	mpn_rsh1sub_n)
+  define(`func_nc',	mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r11}
+	ldr	r4, [up], #4
+	ldr	r8, [vp], #4
+	ADDSUB	r4, r4, r8
+	movs	r12, r7, rrx
+	and	r11, r4, #1	C return value
+	subs	n, n, #4
+	blo	L(end)
+
+L(top):	ldmia	up!, {r5,r6,r7}
+	ldmia	vp!, {r8,r9,r10}
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	ADDSUBC	r6, r6, r9
+	ADDSUBC	r7, r7, r10
+	movs	r12, r7, rrx
+	movs	r6, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
+	subs	n, n, #3
+	stmia	rp!, {r4,r5,r6}
+	mov	r4, r7
+	bhs	L(top)
+
+L(end):	cmn	n, #2
+	bls	L(e2)
+	ldm	up, {r5,r6}
+	ldm	vp, {r8,r9}
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	ADDSUBC	r6, r6, r9
+	movs	r12, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
+	stmia	rp!, {r4,r5}
+	mov	r4, r6
+	b	L(e1)
+
+L(e2):	bne	L(e1)
+	ldr	r5, [up, #0]
+	ldr	r8, [vp, #0]
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	movs	r12, r5, rrx
+	movs	r4, r4, rrx
+	str	r4, [rp], #4
+	mov	r4, r5
+
+L(e1):	RSTCY(	r12, r1)
+	mov	r4, r4, rrx
+	str	r4, [rp, #0]
+	mov	r0, r11
+	pop	{r4-r11}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/rshift.asm b/third_party/gmp/mpn/arm/rshift.asm
new file mode 100644
index 0000000..9ddbc2e
--- /dev/null
+++ b/third_party/gmp/mpn/arm/rshift.asm

@@ -0,0 +1,86 @@
+dnl  ARM mpn_rshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.5
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up]
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsr cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #4]!
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #4]!
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #4]!
+	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(mid):	ldr	r6, [up, #4]!
+	orr	r7, r7, r8, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r8, lsr cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(1):	str	r7, [rp]
+	mov	r0, r4, lsl tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/sec_tabselect.asm b/third_party/gmp/mpn/arm/sec_tabselect.asm
new file mode 100644
index 0000000..76a412b
--- /dev/null
+++ b/third_party/gmp/mpn/arm/sec_tabselect.asm

@@ -0,0 +1,131 @@
+dnl  ARM mpn_sec_tabselect
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.33
+C Cortex-A15	 2.2
+
+C TODO
+C  * Consider using special code for small nents, either swapping the inner and
+C    outer loops, or providing a few completely unrolling the inner loops.
+
+define(`rp',    `r0')
+define(`tp',    `r1')
+define(`n',     `r2')
+define(`nents', `r3')
+C      which  on stack
+
+define(`i',     `r11')
+define(`j',     `r12')
+define(`c',     `r14')
+define(`mask',  `r7')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+	push	{r4-r11, r14}
+
+	subs	j, n, #3
+	bmi	L(outer_end)
+L(outer_top):
+	ldr	c, [sp, #36]
+	mov	i, nents
+	push	{tp}
+
+	mov	r8, #0
+	mov	r9, #0
+	mov	r10, #0
+
+L(top):	subs	c, c, #1
+	ldm	tp, {r4,r5,r6}
+	sbc	mask, mask, mask
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	and	r5, r5, mask
+	and	r6, r6, mask
+	orr	r8, r8, r4
+	orr	r9, r9, r5
+	orr	r10, r10, r6
+	bge	L(top)
+
+	stmia	rp!, {r8,r9,r10}
+	pop	{tp}
+	add	tp, tp, #12
+	subs	j, j, #3
+	bpl	L(outer_top)
+L(outer_end):
+
+	cmp	j, #-1
+	bne	L(n2)
+
+	ldr	c, [sp, #36]
+	mov	i, nents
+	mov	r8, #0
+	mov	r9, #0
+L(tp2):	subs	c, c, #1
+	sbc	mask, mask, mask
+	ldm	tp, {r4,r5}
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	and	r5, r5, mask
+	orr	r8, r8, r4
+	orr	r9, r9, r5
+	bge	L(tp2)
+	stmia	rp, {r8,r9}
+	pop	{r4-r11, r14}
+	return	lr
+
+L(n2):	cmp	j, #-2
+	bne	L(n1)
+
+	ldr	c, [sp, #36]
+	mov	i, nents
+	mov	r8, #0
+L(tp1):	subs	c, c, #1
+	sbc	mask, mask, mask
+	ldr	r4, [tp]
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	orr	r8, r8, r4
+	bge	L(tp1)
+	str	r8, [rp]
+L(n1):	pop	{r4-r11, r14}
+	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/udiv.asm b/third_party/gmp/mpn/arm/udiv.asm
new file mode 100644
index 0000000..7c04789
--- /dev/null
+++ b/third_party/gmp/mpn/arm/udiv.asm

@@ -0,0 +1,104 @@
+dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
+dnl  Return quotient and store remainder through a supplied pointer.
+
+dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rem_ptr',`r0')
+define(`n1',`r1')
+define(`n0',`r2')
+define(`d',`r3')
+
+C divstep -- develop one quotient bit.  Dividend in $1$2, divisor in $3.
+C Quotient bit is shifted into $2.
+define(`divstep',
+       `adcs	$2, $2, $2
+	adc	$1, $1, $1
+	cmp	$1, $3
+	subcs	$1, $1, $3')
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+	mov	r12, #8			C loop counter for both loops below
+	cmp	d, #0x80000000		C check divisor msb and clear carry
+	bcs	L(_large_divisor)
+
+L(oop):	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	sub	r12, r12, #1
+	teq	r12, #0
+	bne	L(oop)
+
+	str	n1, [rem_ptr]		C store remainder
+	adc	r0, n0, n0		C quotient: add last carry from divstep
+	return	lr
+
+L(_large_divisor):
+	stmfd	sp!, { r8, lr }
+
+	and	r8, n0, #1		C save lsb of dividend
+	mov	lr, n1, lsl #31
+	orrs	n0, lr, n0, lsr #1	C n0 = lo(n1n0 >> 1)
+	mov	n1, n1, lsr #1		C n1 = hi(n1n0 >> 1)
+
+	and	lr, d, #1		C save lsb of divisor
+	movs	d, d, lsr #1		C d = floor(orig_d / 2)
+	adc	d, d, #0		C d = ceil(orig_d / 2)
+
+L(oop2):
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	sub	r12, r12, #1
+	teq	r12, #0
+	bne	L(oop2)
+
+	adc	n0, n0, n0		C shift and add last carry from divstep
+	add	n1, r8, n1, lsl #1	C shift in omitted dividend lsb
+	tst	lr, lr			C test saved divisor lsb
+	beq	L(_even_divisor)
+
+	rsb	d, lr, d, lsl #1	C restore orig d value
+	adds	n1, n1, n0		C fix remainder for omitted divisor lsb
+	addcs	n0, n0, #1		C adjust quotient if rem. fix carried
+	subcs	n1, n1, d		C adjust remainder accordingly
+	cmp	n1, d			C remainder >= divisor?
+	subcs	n1, n1, d		C adjust remainder
+	addcs	n0, n0, #1		C adjust quotient
+
+L(_even_divisor):
+	str	n1, [rem_ptr]		C store remainder
+	mov	r0, n0			C quotient
+	ldmfd	sp!, { r8, pc }
+EPILOGUE(mpn_udiv_qrnnd)

diff --git a/third_party/gmp/mpn/arm/v5/gcd_11.asm b/third_party/gmp/mpn/arm/v5/gcd_11.asm
new file mode 100644
index 0000000..3c2b48f
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v5/gcd_11.asm

@@ -0,0 +1,70 @@
+dnl  ARM v5 mpn_gcd_11.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A5	 6.45	obsolete
+C Cortex-A7	 6.41	obsolete
+C Cortex-A8	 5.0	obsolete
+C Cortex-A9	 5.9	obsolete
+C Cortex-A15	 4.40	obsolete
+C Cortex-A17	 5.68	obsolete
+C Cortex-A53	 4.37	obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0',    `r0')
+define(`v0',    `r1')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	subs	r3, u0, v0	C			0
+	beq	L(end)		C
+
+	ALIGN(16)
+L(top):	sub	r2, v0, u0	C			0,5
+	and	r12, r2, r3	C			1
+	clz	r12, r12	C			2
+	rsb	r12, r12, #31	C			3
+	rsbcc	r3, r3, #0	C v = abs(u-v), even	1
+	movcs	u0, v0		C u = min(u,v)		1
+	lsr	v0, r3, r12	C			4
+	subs	r3, u0, v0	C			5
+	bne	L(top)		C
+
+L(end):	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v5/mod_1_1.asm b/third_party/gmp/mpn/arm/v5/mod_1_1.asm
new file mode 100644
index 0000000..3cf0cd7
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v5/mod_1_1.asm

@@ -0,0 +1,129 @@
+dnl  ARM mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 7
+C Cortex-A15	 6
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+	push	{r4-r10}
+	add	r0, r0, r1, asl #2
+	ldr	r5, [r0, #-4]!
+	ldr	r12, [r0, #-4]!
+	subs	r1, r1, #2
+	ble	L(4)
+	ldr	r8, [r3, #12]
+	mov	r4, r12
+	mov	r10, r5
+	umull	r7, r5, r10, r8
+	sub	r1, r1, #1
+	b	L(mid)
+
+L(top):	adds	r12, r6, r7
+	adcs	r10, r4, r5
+	sub	r1, r1, #1
+	mov	r6, #0
+	movcs	r6, r8
+	umull	r7, r5, r10, r8
+	adds	r4, r12, r6
+	subcs	r4, r4, r2
+L(mid):	ldr	r6, [r0, #-4]!
+	teq	r1, #0
+	bne	L(top)
+
+	adds	r12, r6, r7
+	adcs	r5, r4, r5
+	subcs	r5, r5, r2
+L(4):	ldr	r1, [r3, #4]
+	cmp	r1, #0
+	beq	L(7)
+	ldr	r4, [r3, #8]
+	umull	r0, r6, r5, r4
+	adds	r12, r0, r12
+	addcs	r6, r6, #1
+	rsb	r0, r1, #32
+	mov	r0, r12, lsr r0
+	orr	r5, r0, r6, asl r1
+	mov	r12, r12, asl r1
+	b	L(8)
+L(7):	cmp	r5, r2
+	subcs	r5, r5, r2
+L(8):	ldr	r0, [r3, #0]
+	umull	r4, r3, r5, r0
+	add	r5, r5, #1
+	adds	r0, r4, r12
+	adc	r5, r3, r5
+	mul	r5, r2, r5
+	sub	r12, r12, r5
+	cmp	r12, r0
+	addhi	r12, r12, r2
+	cmp	r2, r12
+	subls	r12, r12, r2
+	mov	r0, r12, lsr r1
+	pop	{r4-r10}
+	bx	r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+	stmfd	sp!, {r4, r5, r6, r14}
+	mov	r5, r0
+	clz	r4, r1
+	mov	r0, r1, asl r4
+	rsb	r6, r0, #0
+	bl	mpn_invert_limb
+	str	r0, [r5, #0]
+	str	r4, [r5, #4]
+	cmp	r4, #0
+	beq	L(2)
+	rsb	r1, r4, #32
+	mov	r3, #1
+	mov	r3, r3, asl r4
+	orr	r3, r3, r0, lsr r1
+	mul	r3, r6, r3
+	mov	r4, r3, lsr r4
+	str	r4, [r5, #8]
+L(2):	mul	r0, r6, r0
+	str	r0, [r5, #12]
+	ldmfd	sp!, {r4, r5, r6, pc}
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v5/mod_1_2.asm b/third_party/gmp/mpn/arm/v5/mod_1_2.asm
new file mode 100644
index 0000000..aa26ecb
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v5/mod_1_2.asm

@@ -0,0 +1,156 @@
+dnl  ARM mpn_mod_1s_2p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.25
+C Cortex-A15	 3
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+	push	{r4-r10}
+	tst	n, #1
+	add	r7, r3, #8
+	ldmia	r7, {r7, r8, r12}	C load B1, B2, B3
+	add	ap, ap, n, lsl #2	C put ap at operand end
+	beq	L(evn)
+
+L(odd):	subs	n, n, #1
+	beq	L(1)
+	ldmdb	ap!, {r4,r6,r9}
+	mov	r10, #0
+	umlal	r4, r10, r6, r7
+	umlal	r4, r10, r9, r8
+	b	L(com)
+
+L(evn):	ldmdb	ap!, {r4,r10}
+L(com):	subs	n, n, #2
+	ble	L(end)
+	ldmdb	ap!, {r5,r6}
+	b	L(mid)
+
+L(top):	mov	r9, #0
+	umlal	r5, r9, r6, r7		C B1
+	umlal	r5, r9, r4, r8		C B2
+	ldmdb	ap!, {r4,r6}
+	umlal	r5, r9, r10, r12	C B3
+	ble	L(xit)
+	mov	r10, #0
+	umlal	r4, r10, r6, r7		C B1
+	umlal	r4, r10, r5, r8		C B2
+	ldmdb	ap!, {r5,r6}
+	umlal	r4, r10, r9, r12	C B3
+L(mid):	subs	n, n, #4
+	bge	L(top)
+
+	mov	r9, #0
+	umlal	r5, r9, r6, r7		C B1
+	umlal	r5, r9, r4, r8		C B2
+	umlal	r5, r9, r10, r12	C B3
+	mov	r4, r5
+
+L(end):	movge	   r9, r10		C executed iff coming via xit
+	ldr	r6, [r3, #4]		C cps[1] = cnt
+	mov	r5, #0
+	umlal	r4, r5, r9, r7
+	mov	r7, r5, lsl r6
+L(x):	rsb	r1, r6, #32
+	orr	r8, r7, r4, lsr r1
+	mov	r9, r4, lsl r6
+	ldr	r5, [r3, #0]
+	add	r0, r8, #1
+	umull	r12, r1, r8, r5
+	adds	r4, r12, r9
+	adc	r1, r1, r0
+	mul	r5, r2, r1
+	sub	r9, r9, r5
+	cmp	r9, r4
+	addhi	r9, r9, r2
+	cmp	r2, r9
+	subls	r9, r9, r2
+	mov	r0, r9, lsr r6
+	pop	{r4-r10}
+	bx	r14
+
+L(xit):	mov	r10, #0
+	umlal	r4, r10, r6, r7		C B1
+	umlal	r4, r10, r5, r8		C B2
+	umlal	r4, r10, r9, r12	C B3
+	b	L(end)
+
+L(1):	ldr	r6, [r3, #4]		C cps[1] = cnt
+	ldr	r4, [ap, #-4]		C ap[0]
+	mov	r7, #0
+	b	L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+	push	{r4-r8, r14}
+	clz	r4, r1
+	mov	r5, r1, lsl r4		C b <<= cnt
+	mov	r6, r0			C r6 = cps
+	mov	r0, r5
+	bl	mpn_invert_limb
+	rsb	r3, r4, #32
+	mov	r3, r0, lsr r3
+	mov	r2, #1
+	orr	r3, r3, r2, lsl r4
+	rsb	r1, r5, #0
+	mul	r2, r1, r3
+	umull	r3, r12, r2, r0
+	add	r12, r2, r12
+	mvn	r12, r12
+	mul	r1, r5, r12
+	cmp	r1, r3
+	addhi	r1, r1, r5
+	umull	r12, r7, r1, r0
+	add	r7, r1, r7
+	mvn	r7, r7
+	mul	r3, r5, r7
+	cmp	r3, r12
+	addhi	r3, r3, r5
+	mov	r5, r2, lsr r4
+	mov	r7, r1, lsr r4
+	mov	r8, r3, lsr r4
+	stmia	r6, {r0,r4,r5,r7,r8}	C fill cps
+	pop	{r4-r8, pc}
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/addmul_1.asm b/third_party/gmp/mpn/arm/v6/addmul_1.asm
new file mode 100644
index 0000000..a38af58
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/addmul_1.asm

@@ -0,0 +1,112 @@
+dnl  ARM mpn_addmul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 6.4
+C Cortex-A7	 5.25
+C Cortex-A8	 7
+C Cortex-A9	 3.25
+C Cortex-A15	 4
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, #0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up], #4
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #4
+	ldr	r7, [rp], #4
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	ldr	r6, [rp], #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up], #4
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #4
+	ldr	r7, [rp], #12
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r6, [rp, #-8]
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #-4]
+	ldr	r4, [up], #4
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	ldr	r6, [rp, #0]
+	ldr	r5, [up], #4
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #4]
+	ldr	r4, [up], #4
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	subs	n, n, #4
+	bhi	L(top)
+
+	ldr	r6, [rp, #-8]
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/addmul_2.asm b/third_party/gmp/mpn/arm/v6/addmul_2.asm
new file mode 100644
index 0000000..69d0b8f
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/addmul_2.asm

@@ -0,0 +1,125 @@
+dnl  ARM mpn_addmul_2.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2015 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 4.68
+C Cortex-A5	 3.63
+C Cortex-A7	 3.65
+C Cortex-A8	 4.0
+C Cortex-A9	 2.25
+C Cortex-A15	 2.5
+C Cortex-A17	 2.13
+C Cortex-A53	 3.5
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+	push	{ r4-r9 }
+
+	ldrd	v0, v1, [vp, #0]
+	mov	cya, #0
+	mov	cyb, #0
+
+	tst	n, #1
+	beq	L(evn)
+
+L(odd):	ldr	u1, [up, #0]
+	ldr	r4, [rp, #0]
+	tst	n, #2
+	beq	L(fi1)
+L(fi3):	sub	up, up, #8
+	sub	rp, rp, #8
+	b	L(lo3)
+L(fi1):	sub	n, n, #1
+	b	L(top)
+
+L(evn):	ldr	u0, [up, #0]
+	ldr	r5, [rp, #0]
+	tst	n, #2
+	bne	L(fi2)
+L(fi0):	sub	up, up, #4
+	sub	rp, rp, #4
+	b	L(lo0)
+L(fi2):	sub	up, up, #12
+	sub	rp, rp, #12
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+L(lo0):	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+L(lo3):	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+L(lo2):	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	n, n, #4
+	bhi	L(top)
+
+L(end):	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	mov	r0, cyb
+
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/addmul_3.asm b/third_party/gmp/mpn/arm/v6/addmul_3.asm
new file mode 100644
index 0000000..d1490cd
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/addmul_3.asm

@@ -0,0 +1,191 @@
+dnl  ARM mpn_addmul_3.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 4.33
+C Cortex-A5	 3.28
+C Cortex-A7	 3.25
+C Cortex-A8	 3.17
+C Cortex-A9	 2.125
+C Cortex-A15	 2
+C Cortex-A17	 2.11
+C Cortex-A53	 4.18
+
+C TODO
+C  * Use a fast path for n <= KARATSUBA_MUL_THRESHOLD using a jump table,
+C    avoiding the current multiply.
+C  * Start the first multiply or multiplies early.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r4')  define(`v1',`r5')  define(`v2',`r6')
+define(`u0',`r3')  define(`u1',`r14')
+define(`w0',`r7')  define(`w1',`r8')  define(`w2',`r9')
+define(`cy0',`r10')  define(`cy1',`r11') define(`cy2',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_3)
+	push	{ r4-r11, r14 }
+
+	ldr	w0, =0xaaaaaaab		C 3^{-1} mod 2^32
+	ldm	vp, { v0,v1,v2 }
+	mov	cy0, #0
+	mov	cy1, #0
+	mov	cy2, #0
+
+C Tricky n mod 6
+	mul	w0, w0, n		C n * 3^{-1} mod 2^32
+	and	w0, w0, #0xc0000001	C pseudo-CRT mod 3,2
+	sub	n, n, #3
+ifdef(`PIC',`
+	add	pc, pc, w0, ror $28
+	nop
+	b	L(b0)
+	b	L(b2)
+	b	L(b4)
+	.word	0xe7f000f0	C udf
+	b	L(b3)
+	b	L(b5)
+	b	L(b1)
+',`
+	ldr	pc, [pc, w0, ror $28]
+	nop
+	.word	L(b0), L(b2), L(b4), 0, L(b3), L(b5), L(b1)
+')
+
+L(b5):	add	up, up, #-8
+	ldr	w1, [rp, #0]
+	ldr	w2, [rp, #4]
+	ldr	u1, [up, #8]
+	b	L(lo5)
+
+L(b4):	add	rp, rp, #-4
+	add	up, up, #-12
+	ldr	w2, [rp, #4]
+	ldr	w0, [rp, #8]
+	ldr	u0, [up, #12]
+	b	L(lo4)
+
+L(b3):	add	rp, rp, #-8
+	add	up, up, #-16
+	ldr	w0, [rp, #8]
+	ldr	w1, [rp, #12]
+	ldr	u1, [up, #16]
+	b	L(lo3)
+
+L(b1):	add	rp, rp, #8
+	ldr	w2, [rp, #-8]
+	ldr	w0, [rp, #-4]
+	ldr	u1, [up, #0]
+	b	L(lo1)
+
+L(b0):	add	rp, rp, #4
+	add	up, up, #-4
+	ldr	w0, [rp, #-4]
+	ldr	w1, [rp, #0]
+	ldr	u0, [up, #4]
+	b	L(lo0)
+
+L(b2):	add	rp, rp, #12
+	add	up, up, #4
+	ldr	w1, [rp, #-12]
+	ldr	w2, [rp, #-8]
+	ldr	u0, [up, #-4]
+
+	ALIGN(16)
+L(top):	ldr	w0, [rp, #-4]
+	umaal	w1, cy0, u0, v0
+	ldr	u1, [up, #0]
+	umaal	w2, cy1, u0, v1
+	str	w1, [rp, #-12]
+	umaal	w0, cy2, u0, v2
+L(lo1):	ldr	w1, [rp, #0]
+	umaal	w2, cy0, u1, v0
+	ldr	u0, [up, #4]
+	umaal	w0, cy1, u1, v1
+	str	w2, [rp, #-8]
+	umaal	w1, cy2, u1, v2
+L(lo0):	ldr	w2, [rp, #4]
+	umaal	w0, cy0, u0, v0
+	ldr	u1, [up, #8]
+	umaal	w1, cy1, u0, v1
+	str	w0, [rp, #-4]
+	umaal	w2, cy2, u0, v2
+L(lo5):	ldr	w0, [rp, #8]
+	umaal	w1, cy0, u1, v0
+	ldr	u0, [up, #12]
+	umaal	w2, cy1, u1, v1
+	str	w1, [rp, #0]
+	umaal	w0, cy2, u1, v2
+L(lo4):	ldr	w1, [rp, #12]
+	umaal	w2, cy0, u0, v0
+	ldr	u1, [up, #16]
+	umaal	w0, cy1, u0, v1
+	str	w2, [rp, #4]
+	umaal	w1, cy2, u0, v2
+L(lo3):	ldr	w2, [rp, #16]
+	umaal	w0, cy0, u1, v0
+	ldr	u0, [up, #20]
+	umaal	w1, cy1, u1, v1
+	str	w0, [rp, #8]
+	umaal	w2, cy2, u1, v2
+L(lo2):	subs	n, n, #6
+	add	up, up, #24
+	add	rp, rp, #24
+	bge	L(top)
+
+L(end):	umaal	w1, cy0, u0, v0
+	ldr	u1, [up, #0]
+	umaal	w2, cy1, u0, v1
+	str	w1, [rp, #-12]
+	mov	w0, #0
+	umaal	w0, cy2, u0, v2
+	umaal	w2, cy0, u1, v0
+	umaal	w0, cy1, u1, v1
+	str	w2, [rp, #-8]
+	umaal	cy1, cy2, u1, v2
+	adds	w0, w0, cy0
+	str	w0, [rp, #-4]
+	adcs	w1, cy1, #0
+	str	w1, [rp, #0]
+	adc	r0, cy2, #0
+
+	pop	{ r4-r11, pc }
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/dive_1.asm b/third_party/gmp/mpn/arm/v6/dive_1.asm
new file mode 100644
index 0000000..92de814
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/dive_1.asm

@@ -0,0 +1,149 @@
+dnl  ARM v6 mpn_divexact_1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb       cycles/limb
+C               norm    unorm    modexact_1c_odd
+C StrongARM	 -	 -
+C XScale	 -	 -
+C Cortex-A7	 ?	 ?
+C Cortex-A8	 ?	 ?
+C Cortex-A9	 9	10		 9
+C Cortex-A15	 7	 7		 7
+
+C Architecture requirements:
+C v5	-
+C v5t	clz
+C v5te	-
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`d',  `r3')
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r10')
+
+ASM_START()
+PROLOGUE(mpn_divexact_1)
+	push	{r4,r5,r6,r7,r8,r9}
+
+	tst	d, #1
+
+	rsb	r4, d, #0
+	and	r4, r4, d
+	clz	r4, r4
+	rsb	cnt, r4, #31		C count_trailing_zeros
+	mov	d, d, lsr cnt
+
+C binvert limb
+	LEA(	r4, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, lsl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, lsl #1	C r4 = inverse
+
+	ldr	r5, [up], #4		C up[0]
+	mov	cy, #0
+	rsb	r8, r4, #0		C r8 = -inverse
+	beq	L(unnorm)
+
+L(norm):
+	subs	n, n, #1
+	mul	r5, r5, r4
+	beq	L(end)
+
+	ALIGN(16)
+L(top):	ldr	r9, [up], #4
+	mov	r12, #0
+	str	r5, [rp], #4
+	umaal	r12, cy, r5, d
+	mul	r5, r9, r4
+	mla	r5, cy, r8, r5
+	subs	n, n, #1
+	bne	L(top)
+
+L(end):	str	r5, [rp]
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+
+L(unnorm):
+	push	{r10,r11}
+	rsb	tnc, cnt, #32
+	mov	r11, r5, lsr cnt
+	subs	n, n, #1
+	beq	L(edx)
+
+	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	mul	r5, r9, r4
+	subs	n, n, #1
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	mov	r12, #0
+	str	r5, [rp], #4
+	umaal	r12, cy, r5, d
+	mul	r5, r9, r4
+	mla	r5, cy, r8, r5
+	subs	n, n, #1
+	bne	L(tpu)
+
+L(edu):	str	r5, [rp], #4
+	mov	r12, #0
+	umaal	r12, cy, r5, d
+	mul	r5, r11, r4
+	mla	r5, cy, r8, r5
+	str	r5, [rp]
+	pop	{r10,r11}
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+
+L(edx):	mul	r5, r11, r4
+	str	r5, [rp]
+	pop	{r10,r11}
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/gmp-mparam.h b/third_party/gmp/mpn/arm/v6/gmp-mparam.h
new file mode 100644
index 0000000..35a7c55
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/gmp-mparam.h

@@ -0,0 +1,187 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 700 MHz ARM11 (raspberry pi) */
+/* FFT tuning limit = 8,088,775 */
+/* Generated by tuneup.c, 2019-10-23, gcc 8.3 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     19
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 71.61% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           38
+
+#define DIV_1_VS_MUL_1_PERCENT             251
+
+#define MUL_TOOM22_THRESHOLD                38
+#define MUL_TOOM33_THRESHOLD               134
+#define MUL_TOOM44_THRESHOLD               512
+#define MUL_TOOM6H_THRESHOLD                 0  /* always */
+#define MUL_TOOM8H_THRESHOLD               620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     209
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     625
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     209
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     211
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     300
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 55
+#define SQR_TOOM3_THRESHOLD                200
+#define SQR_TOOM4_THRESHOLD                470
+#define SQR_TOOM6_THRESHOLD                614
+#define SQR_TOOM8_THRESHOLD                882
+
+#define MULMID_TOOM42_THRESHOLD             62
+
+#define MULMOD_BNM1_THRESHOLD               23
+#define SQRMOD_BNM1_THRESHOLD               26
+
+#define MUL_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     15, 5}, {     31, 6}, {     28, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 6}, {     43, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     99, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    207,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271,11}, {    159,10}, \
+    {    351,11}, {    191,10}, {    399,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
+    {    607,11}, {    319,10}, {    639,11}, {    351,12}, \
+    {    191,11}, {    415,13}, {    127,12}, {    255,11}, \
+    {    575,12}, {    319,11}, {    671,12}, {    383,11}, \
+    {    799,12}, {    447,13}, {    255,12}, {    511,11}, \
+    {   1023,12}, {    703,13}, {    383,12}, {    895,14}, \
+    {    255,13}, {    511,12}, {   1151,13}, {    639,12}, \
+    {   1343,13}, {    767,12}, {   1599,13}, {    895,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 98
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             530  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    530, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     21, 6}, \
+    {     43, 7}, {     23, 6}, {     47, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     43, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    167,10}, {     95, 9}, {    191,10}, {    111,11}, \
+    {     63,10}, {    143, 9}, {    287,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    351,11}, \
+    {    191,10}, {    415,11}, {    223,12}, {    127,11}, \
+    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    639,11}, {    351,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,13}, {    127,12}, \
+    {    255,11}, {    607,12}, {    319,11}, {    703,12}, \
+    {    383,11}, {    799,12}, {    447,11}, {    895,13}, \
+    {    255,12}, {    511,11}, {   1023,12}, {    703,13}, \
+    {    383,12}, {    895,14}, {    255,13}, {    511,12}, \
+    {   1151,13}, {    639,12}, {   1343,13}, {    767,12}, \
+    {   1599,13}, {    895,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 104
+#define SQR_FFT_THRESHOLD                 4416
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  51
+#define MULLO_MUL_N_THRESHOLD            11278
+#define SQRLO_BASECASE_THRESHOLD            10
+#define SQRLO_DC_THRESHOLD                  55
+#define SQRLO_SQR_THRESHOLD               8648
+
+#define DC_DIV_QR_THRESHOLD                 36
+#define DC_DIVAPPR_Q_THRESHOLD             146
+#define DC_BDIV_QR_THRESHOLD                46
+#define DC_BDIV_Q_THRESHOLD                160
+
+#define INV_MULMOD_BNM1_THRESHOLD           74
+#define INV_NEWTON_THRESHOLD               145
+#define INV_APPR_THRESHOLD                 147
+
+#define BINV_NEWTON_THRESHOLD              372
+#define REDC_1_TO_REDC_2_THRESHOLD           6
+#define REDC_2_TO_REDC_N_THRESHOLD         140
+
+#define MU_DIV_QR_THRESHOLD               2801
+#define MU_DIVAPPR_Q_THRESHOLD            2801
+#define MUPI_DIV_QR_THRESHOLD               79
+#define MU_BDIV_QR_THRESHOLD              2541
+#define MU_BDIV_Q_THRESHOLD               2764
+
+#define POWM_SEC_TABLE  3,20,139,734
+
+#define GET_STR_DC_THRESHOLD                27
+#define GET_STR_PRECOMPUTE_THRESHOLD        45
+#define SET_STR_DC_THRESHOLD               342
+#define SET_STR_PRECOMPUTE_THRESHOLD      1290
+
+#define FAC_DSC_THRESHOLD                  390
+#define FAC_ODD_THRESHOLD                  438
+
+#define MATRIX22_STRASSEN_THRESHOLD         25
+#define HGCD2_DIV1_METHOD                    5  /* 1.32% faster than 3 */
+#define HGCD_THRESHOLD                      82
+#define HGCD_APPR_THRESHOLD                 81
+#define HGCD_REDUCE_THRESHOLD             4633
+#define GCD_DC_THRESHOLD                   345
+#define GCDEXT_DC_THRESHOLD                268
+#define JACOBI_BASE_METHOD                   1  /* 3.30% faster than 2 */
+
+/* Tuneup completed successfully, took 45018 seconds */

diff --git a/third_party/gmp/mpn/arm/v6/mode1o.asm b/third_party/gmp/mpn/arm/v6/mode1o.asm
new file mode 100644
index 0000000..a2f77a6
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/mode1o.asm

@@ -0,0 +1,95 @@
+dnl  ARM v6 mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 9
+C Cortex-A15	 7
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	smulbb
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+	.protected	binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+	stmfd	sp!, {r4, r5, r6, r7}
+
+	LEA(	r4, binvert_limb_table)
+
+	ldr	r6, [up], #4		C up[0]
+
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	smulbb	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, asl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, asl #1	C r4 = inverse
+
+	subs	n, n, #1
+	sub	r6, r6, cy
+	mul	r6, r6, r4
+	beq	L(end)
+
+	rsb	r5, r4, #0		C r5 = -inverse
+
+L(top):	ldr	r7, [up], #4
+	mov	r12, #0
+	umaal	r12, cy, r6, d
+	mul	r6, r7, r4
+	mla	r6, cy, r5, r6
+	subs	n, n, #1
+	bne	L(top)
+
+L(end):	mov	r12, #0
+	umaal	r12, cy, r6, d
+	mov	r0, cy
+
+	ldmfd	sp!, {r4, r5, r6, r7}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/mul_1.asm b/third_party/gmp/mpn/arm/v6/mul_1.asm
new file mode 100644
index 0000000..3c6ef99
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/mul_1.asm

@@ -0,0 +1,115 @@
+dnl  ARM mpn_mul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 6.4
+C Cortex-A7	 5.25
+C Cortex-A8	 7
+C Cortex-A9	 3.25
+C Cortex-A15	 4
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, #0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #4
+	mov	r6, #0
+	ldr	r5, [up], #4
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #4
+	add	rp, rp, #4
+	mov	r7, #0
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	mov	r6, #0
+	add	rp, rp, #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up], #4
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #4
+	add	rp, rp, #12
+	mov	r7, #0
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	mov	r6, #0
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	mov	r7, #0
+	ldr	r4, [up], #4
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	mov	r6, #0
+	ldr	r5, [up], #4
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	mov	r7, #0
+	ldr	r4, [up], #4
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	subs	n, n, #4
+	bhi	L(top)
+
+	mov	r6, #0
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/mul_2.asm b/third_party/gmp/mpn/arm/v6/mul_2.asm
new file mode 100644
index 0000000..edd27f3
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/mul_2.asm

@@ -0,0 +1,135 @@
+dnl  ARM mpn_mul_2.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 5.25
+C Cortex-A5	 3.63
+C Cortex-A7	 3.15
+C Cortex-A8	 5.0
+C Cortex-A9	 2.25
+C Cortex-A15	 2.5
+C Cortex-A17	 2.13
+C Cortex-A53	 3.5
+
+C TODO
+C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
+C    and possible speedups to 2.0 c/l.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_2)
+	push	{ r4, r5, r6, r7, r8, r9 }
+
+	ldm	vp, { v0, v1 }
+	mov	cya, #0
+	mov	cyb, #0
+
+	tst	n, #1
+	beq	L(evn)
+L(odd):	mov	r5, #0
+	ldr	u0, [up, #0]
+	mov	r4, #0
+	tst	n, #2
+	beq	L(fi1)
+L(fi3):	sub	up, up, #12
+	sub	rp, rp, #16
+	b	L(lo3)
+L(fi1):	sub	n, n, #1
+	sub	up, up, #4
+	sub	rp, rp, #8
+	b	L(lo1)
+L(evn):	mov	r4, #0
+	ldr	u1, [up, #0]
+	mov	r5, #0
+	tst	n, #2
+	bne	L(fi2)
+L(fi0):	sub	up, up, #8
+	sub	rp, rp, #12
+	b	L(lo0)
+L(fi2):	subs	n, n, #2
+	sub	rp, rp, #4
+	bls	L(end)
+
+	ALIGN(16)
+L(top):	ldr	u0, [up, #4]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #4]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(lo1):	ldr	u1, [up, #8]
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #8]
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+L(lo0):	ldr	u0, [up, #12]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #12]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(lo3):	ldr	u1, [up, #16]!
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #16]!
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+	subs	n, n, #4
+	bhi	L(top)
+
+L(end):	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #4]
+	umaal	r5, cya, u0, v0
+	umaal	cya, cyb, u0, v1
+	str	r5, [rp, #8]
+	str	cya, [rp, #12]
+	mov	r0, cyb
+
+	pop	{ r4, r5, r6, r7, r8, r9 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/popham.asm b/third_party/gmp/mpn/arm/v6/popham.asm
new file mode 100644
index 0000000..c254368
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/popham.asm

@@ -0,0 +1,139 @@
+dnl  ARM mpn_popcount and mpn_hamdist.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		     popcount	      hamdist
+C		    cycles/limb	    cycles/limb
+C StrongARM		 -
+C XScale		 -
+C Cortex-A7		 ?
+C Cortex-A8		 ?
+C Cortex-A9		 8.94		 9.47
+C Cortex-A15		 5.67		 6.44
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	usada8
+C v6t2	-
+C v7a	-
+
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`ap',		`r0')
+  define(`n',		`r1')
+  define(`a0',		`r2')
+  define(`a1',		`r3')
+  define(`s',		`r5')
+  define(`b_01010101',	`r6')
+  define(`b_00110011',	`r7')
+  define(`b_00001111',	`r8')
+  define(`zero',	`r9')
+  define(`POPC',	`$1')
+  define(`HAMD',	`dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`ap',		`r0')
+  define(`bp',		`r1')
+  define(`n',		`r2')
+  define(`a0',		`r6')
+  define(`a1',		`r7')
+  define(`b0',		`r4')
+  define(`b1',		`r5')
+  define(`s',		`r11')
+  define(`b_01010101',	`r8')
+  define(`b_00110011',	`r9')
+  define(`b_00001111',	`r10')
+  define(`zero',	`r3')
+  define(`POPC',	`dnl')
+  define(`HAMD',	`$1')
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+ASM_START()
+PROLOGUE(func)
+POPC(`	push	{ r4-r9 }	')
+HAMD(`	push	{ r4-r11 }	')
+
+	ldr	b_01010101, =0x55555555
+	mov	r12, #0
+	ldr	b_00110011, =0x33333333
+	mov	zero, #0
+	ldr	b_00001111, =0x0f0f0f0f
+
+	tst	n, #1
+	beq	L(evn)
+
+L(odd):	ldr	a1, [ap], #4		C 1 x 32 1-bit accumulators, 0-1
+HAMD(`	ldr	b1, [bp], #4	')	C 1 x 32 1-bit accumulators, 0-1
+HAMD(`	eor	a1, a1, b1	')
+	and	r4, b_01010101, a1, lsr #1
+	sub	a1, a1, r4
+	and	r4, a1, b_00110011
+	bic	r5, a1, b_00110011
+	add	r5, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	subs	n, n, #1
+	b	L(mid)
+
+L(evn):	mov	s, #0
+
+L(top):	ldrd	a0, a1, [ap], #8	C 2 x 32 1-bit accumulators, 0-1
+HAMD(`	ldrd	b0, b1, [bp], #8')
+HAMD(`	eor	a0, a0, b0	')
+HAMD(`	eor	a1, a1, b1	')
+	subs	n, n, #2
+	usada8	r12, s, zero, r12
+	and	r4, b_01010101, a0, lsr #1
+	sub	a0, a0, r4
+	and	r4, b_01010101, a1, lsr #1
+	sub	a1, a1, r4
+	and	r4, a0, b_00110011
+	bic	r5, a0, b_00110011
+	add	a0, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	and	r4, a1, b_00110011
+	bic	r5, a1, b_00110011
+	add	a1, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	add	r5, a0, a1		C 8 4-bit accumulators, 0-8
+L(mid):	and	r4, r5, b_00001111
+	bic	r5, r5, b_00001111
+	add	s, r4, r5, lsr #4	C 4 8-bit accumulators
+	bne	L(top)
+
+	usada8	r0, s, zero, r12
+POPC(`	pop	{ r4-r9 }	')
+HAMD(`	pop	{ r4-r11 }	')
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/sqr_basecase.asm b/third_party/gmp/mpn/arm/v6/sqr_basecase.asm
new file mode 100644
index 0000000..0fc4f13
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/sqr_basecase.asm

@@ -0,0 +1,544 @@
+dnl  ARM v6 mpn_sqr_basecase.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2015 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Code structure:
+C
+C
+C        m_2(0m4)        m_2(2m4)        m_2(1m4)        m_2(3m4)
+C           |               |               |               |
+C           |               |               |               |
+C           |               |               |               |
+C          \|/             \|/             \|/             \|/
+C              ____________                   ____________
+C             /            \                 /            \
+C            \|/            \               \|/            \
+C         am_2(3m4)       am_2(1m4)       am_2(0m4)       am_2(2m4)
+C            \            /|\                \            /|\
+C             \____________/                  \____________/
+C                       \                        /
+C                        \                      /
+C                         \                    /
+C                         cor3             cor2
+C                            \              /
+C                             \            /
+C                            sqr_diag_addlsh1
+
+C TODO
+C  * Align more labels.
+C  * Further tweak counter and updates in outer loops.  (This could save
+C    perhaps 5n cycles).
+C  * Avoid sub-with-lsl in outer loops.  We could keep n up-shifted, then
+C    initialise loop counter i with a right shift.
+C  * Try to use fewer register.  Perhaps coalesce r9 branch target and n_saved.
+C    (This could save 2-3 cycles for n > 4.)
+C  * Optimise sqr_diag_addlsh1 loop.  The current code uses old-style carry
+C    propagation.
+C  * Stop loops earlier suppressing writes of upper-most rp[] values.
+C  * The addmul_2 loops here runs well on all cores, but mul_2 runs poorly
+C    particularly on Cortex-A8.
+
+
+define(`rp',      r0)
+define(`up',      r1)
+define(`n',       r2)
+
+define(`v0',      r3)
+define(`v1',      r6)
+define(`i',       r8)
+define(`n_saved', r14)
+define(`cya',     r11)
+define(`cyb',     r12)
+define(`u0',      r7)
+define(`u1',      r9)
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+	and	r12, n, #3
+	cmp	n, #4
+	addgt	r12, r12, #4
+	add	pc, pc, r12, lsl #2
+	nop
+	b	L(4)
+	b	L(1)
+	b	L(2)
+	b	L(3)
+	b	L(0m4)
+	b	L(1m4)
+	b	L(2m4)
+	b	L(3m4)
+
+
+L(1m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_2m4)-.-8
+	ldm	up, {v0,v1,u0}
+	sub	up, up, #4
+	mov	cyb, #0
+	mov	r5, #0
+	umull	r4, cya, v1, v0
+	str	r4, [rp], #-12
+	mov	r4, #0
+	b	L(ko0)
+
+L(3m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_0m4)-.-8
+	ldm	up, {v0,v1,u0}
+	add	up, up, #4
+	mov	cyb, #0
+	mov	r5, #0
+	umull	r4, cya, v1, v0
+	str	r4, [rp], #-4
+	mov	r4, #0
+	b	L(ko2)
+
+L(2m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_3m4)-.-8
+	ldm	up, {v0,v1,u1}
+	mov	cyb, #0
+	mov	r4, #0
+	umull	r5, cya, v1, v0
+	str	r5, [rp], #-8
+	mov	r5, #0
+	b	L(ko1)
+
+L(0m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_1m4)-.-8
+	ldm	up, {v0,v1,u1}
+	mov	cyb, #0
+	mov	r4, #0
+	add	up, up, #8
+	umull	r5, cya, v1, v0
+	str	r5, [rp, #0]
+	mov	r5, #0
+
+L(top):	ldr	u0, [up, #4]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #4]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(ko2):	ldr	u1, [up, #8]
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #8]
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+L(ko1):	ldr	u0, [up, #12]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #12]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(ko0):	ldr	u1, [up, #16]!
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #16]!
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+	subs	i, i, #4
+	bhi	L(top)
+
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #4]
+	umaal	r5, cya, u0, v0
+	umaal	cya, cyb, u0, v1
+	str	r5, [rp, #8]
+	str	cya, [rp, #12]
+	str	cyb, [rp, #16]
+
+	add	up, up, #4
+	sub	n, n, #1
+	add	rp, rp, #8
+	bx	r10
+
+L(evnloop):
+	subs	i, n, #6
+	sub	n, n, #2
+	blt	L(cor2)
+	ldm	up, {v0,v1,u1}
+	add	up, up, #8
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r4, [rp, #-4]
+	umaal	r4, cya, v1, v0
+	str	r4, [rp, #-4]
+	ldr	r4, [rp, #0]
+
+	ALIGN(16)
+L(ua2):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua2)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_0m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #8
+
+	sub	i, n, #4
+	sub	n, n, #2
+	ldm	up, {v0,v1,u1}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r4, [rp, #4]
+	umaal	r4, cya, v1, v0
+	str	r4, [rp, #4]
+	ldr	r4, [rp, #8]
+	b	L(lo0)
+
+	ALIGN(16)
+L(ua0):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+L(lo0):	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua0)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_2m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #16
+	b	L(evnloop)
+
+
+L(oddloop):
+	sub	i, n, #5
+	sub	n, n, #2
+	ldm	up, {v0,v1,u0}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r5, [rp, #0]
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #0]
+	ldr	r5, [rp, #4]
+	add	up, up, #4
+	b	L(lo1)
+
+	ALIGN(16)
+L(ua1):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+L(lo1):	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua1)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_3m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #4
+
+	subs	i, n, #3
+	beq	L(cor3)
+	sub	n, n, #2
+	ldm	up, {v0,v1,u0}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r5, [rp, #8]
+	sub	up, up, #4
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #8]
+	ldr	r5, [rp, #12]
+	b	L(lo3)
+
+	ALIGN(16)
+L(ua3):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+L(lo3):	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua3)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_1m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #12
+	b	L(oddloop)
+
+
+L(cor3):ldm	up, {v0,v1,u0}
+	ldr	r5, [rp, #8]
+	mov	cya, #0
+	mov	cyb, #0
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #8]
+	ldr	r5, [rp, #12]
+	ldr	r4, [rp, #16]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #12]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #16]
+	str	cya, [rp, #20]
+	str	cyb, [rp, #24]
+	add	up, up, #16
+	mov	cya, cyb
+	adds	rp, rp, #36		C clear cy
+	mov	cyb, #0
+	umaal	cya, cyb, u1, u0
+	b	L(sqr_diag_addlsh1)
+
+L(cor2):
+	ldm	up!, {v0,v1,u0}
+	mov	r4, cya
+	mov	r5, cyb
+	mov	cya, #0
+	umaal	r4, cya, v1, v0
+	mov	cyb, #0
+	umaal	r5, cya, u0, v0
+	strd	r4, r5, [rp, #-4]
+	umaal	cya, cyb, u0, v1
+	add	rp, rp, #16
+C	b	L(sqr_diag_addlsh1)
+
+
+define(`w0',  r6)
+define(`w1',  r7)
+define(`w2',  r8)
+define(`rbx', r9)
+
+L(sqr_diag_addlsh1):
+	str	cya, [rp, #-12]
+	str	cyb, [rp, #-8]
+	sub	n, n_saved, #1
+	sub	up, up, n_saved, lsl #2
+	sub	rp, rp, n_saved, lsl #3
+	ldr	r3, [up], #4
+	umull	w1, r5, r3, r3
+	mov	w2, #0
+	mov	r10, #0
+C	cmn	r0, #0			C clear cy (already clear)
+	b	L(lm)
+
+L(tsd):	adds	w0, w0, rbx
+	adcs	w1, w1, r4
+	str	w0, [rp, #0]
+L(lm):	ldr	w0, [rp, #4]
+	str	w1, [rp, #4]
+	ldr	w1, [rp, #8]!
+	add	rbx, r5, w2
+	adcs	w0, w0, w0
+	ldr	r3, [up], #4
+	adcs	w1, w1, w1
+	adc	w2, r10, r10
+	umull	r4, r5, r3, r3
+	subs	n, n, #1
+	bne	L(tsd)
+
+	adds	w0, w0, rbx
+	adcs	w1, w1, r4
+	adc	w2, r5, w2
+	stm	rp, {w0,w1,w2}
+
+	pop	{r4-r11, pc}
+
+
+C Straight line code for n <= 4
+
+L(1):	ldr	r3, [up, #0]
+	umull	r1, r2, r3, r3
+	stm	rp, {r1,r2}
+	bx	r14
+
+L(2):	push	{r4-r5}
+	ldm	up, {r5,r12}
+	umull	r1, r2, r5, r5
+	umull	r3, r4, r12, r12
+	umull	r5, r12, r5, r12
+	adds	r5, r5, r5
+	adcs	r12, r12, r12
+	adc	r4, r4, #0
+	adds	r2, r2, r5
+	adcs	r3, r3, r12
+	adc	r4, r4, #0
+	stm	rp, {r1,r2,r3,r4}
+	pop	{r4-r5}
+	bx	r14
+
+L(3):	push	{r4-r11}
+	ldm	up, {r7,r8,r9}
+	umull	r1, r2, r7, r7
+	umull	r3, r4, r8, r8
+	umull	r5, r6, r9, r9
+	umull	r10, r11, r7, r8
+	mov	r12, #0
+	umlal	r11, r12, r7, r9
+	mov	r7, #0
+	umlal	r12, r7, r8, r9
+	adds	r10, r10, r10
+	adcs	r11, r11, r11
+	adcs	r12, r12, r12
+	adcs	r7, r7, r7
+	adc	r6, r6, #0
+	adds	r2, r2, r10
+	adcs	r3, r3, r11
+	adcs	r4, r4, r12
+	adcs	r5, r5, r7
+	adc	r6, r6, #0
+	stm	rp, {r1,r2,r3,r4,r5,r6}
+	pop	{r4-r11}
+	bx	r14
+
+L(4):	push	{r4-r11, r14}
+	ldm	up, {r9,r10,r11,r12}
+	umull	r1, r2, r9, r9
+	umull	r3, r4, r10, r10
+	umull	r5, r6, r11, r11
+	umull	r7, r8, r12, r12
+	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
+	umull	r1, r2, r9, r10
+	mov	r3, #0
+	umlal	r2, r3, r9, r11
+	mov	r4, #0
+	umlal	r3, r4, r9, r12
+	mov	r5, #0
+	umlal	r3, r5, r10, r11
+	umaal	r4, r5, r10, r12
+	mov	r6, #0
+	umlal	r5, r6, r11, r12
+	adds	r1, r1, r1
+	adcs	r2, r2, r2
+	adcs	r3, r3, r3
+	adcs	r4, r4, r4
+	adcs	r5, r5, r5
+	adcs	r6, r6, r6
+	add	rp, rp, #4
+	adc	r7, r8, #0
+	ldm	rp, {r8,r9,r10,r11,r12,r14}
+	adds	r1, r1, r8
+	adcs	r2, r2, r9
+	adcs	r3, r3, r10
+	adcs	r4, r4, r11
+	adcs	r5, r5, r12
+	adcs	r6, r6, r14
+	adc	r7, r7, #0
+	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
+	pop	{r4-r11, pc}
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6/submul_1.asm b/third_party/gmp/mpn/arm/v6/submul_1.asm
new file mode 100644
index 0000000..8a21733
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6/submul_1.asm

@@ -0,0 +1,125 @@
+dnl  ARM mpn_submul_1.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.75
+C Cortex-A15	 4.0
+
+C This loop complements U on the fly,
+C   U' = B^n - 1 - U
+C and then uses that
+C   R - U*v = R + U'*v + v - B^n v
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, v0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #12
+	mvn	r4, r4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up, #-8]
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #16
+	mvn	r5, r5
+	ldr	r7, [rp], #4
+	ldr	r4, [up, #-12]
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	mvn	r4, r4
+	ldr	r6, [rp], #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up]
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #8
+	mvn	r5, r5
+	ldr	r7, [rp], #12
+	ldr	r4, [up, #-4]
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r6, [rp, #-8]
+	ldr	r5, [up]
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	add	up, up, #16
+	mvn	r5, r5
+	ldr	r7, [rp, #-4]
+	ldr	r4, [up, #-12]
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	mvn	r4, r4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up, #-8]
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	mvn	r5, r5
+	ldr	r7, [rp, #4]
+	ldr	r4, [up, #-4]
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	mvn	r4, r4
+	subs	n, n, #4
+	bhi	L(top)
+
+	ldr	r6, [rp, #-8]
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	sub	r0, v0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6t2/divrem_1.asm b/third_party/gmp/mpn/arm/v6t2/divrem_1.asm
new file mode 100644
index 0000000..be24615
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6t2/divrem_1.asm

@@ -0,0 +1,212 @@
+dnl  ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		norm	unorm	frac
+C StrongARM	 -	 -	 -
+C XScale	 -	 -	 -
+C Cortex-A7	 ?	 ?	 ?
+C Cortex-A8	 ?	 ?	 ?
+C Cortex-A9	13	14	13
+C Cortex-A15	11.4	11.8	11.1
+
+C TODO
+C  * Optimise inner-loops better, they could likely run a cycle or two faster.
+C  * Decrease register usage, streamline non-loop code.
+
+define(`qp_arg',  `r0')
+define(`fn',      `r1')
+define(`up_arg',  `r2')
+define(`n_arg',   `r3')
+define(`d_arg',   `0')
+define(`dinv_arg',`4')
+define(`cnt_arg', `8')
+
+define(`n',       `r9')
+define(`qp',      `r5')
+define(`up',      `r6')
+define(`cnt',     `r7')
+define(`tnc',     `r10')
+define(`dinv',    `r0')
+define(`d',       `r4')
+
+ASM_START()
+PROLOGUE(mpn_preinv_divrem_1)
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	ldr	d,    [sp, #9*4+d_arg]
+	ldr	cnt,  [sp, #9*4+cnt_arg]
+	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
+	sub	n, r3, #1
+	add	r3, r1, n
+	cmp	d, #0
+	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
+	add	up, up_arg, n, lsl #2	C put up at U[] end
+	ldr	dinv, [sp, #9*4+dinv_arg]
+	blt	L(nent)
+	b	L(uent)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	sub	n, r3, #1
+	ldr	d, [sp, #9*4+d_arg]	C d
+	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
+	add	r3, r1, n
+	cmp	d, #0
+	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
+	add	up, up_arg, n, lsl #2	C put up at U[] end
+	blt	L(normalised)
+
+L(unnorm):
+	clz	cnt, d
+	mov	r0, d, lsl cnt		C pass d << cnt
+	bl	mpn_invert_limb
+L(uent):
+	mov	d, d, lsl cnt		C d <<= cnt
+	cmp	n, #0
+	mov	r1, #0			C r
+	blt	L(frac)
+
+	ldr	r11, [up, #0]
+
+	rsb	tnc, cnt, #32
+	mov	r1, r11, lsr tnc
+	mov	r11, r11, lsl cnt
+	beq	L(uend)
+
+	ldr	r3, [up, #-4]!
+	orr	r2, r11, r3, lsr tnc
+	b	L(mid)
+
+L(utop):
+	mls	r1, d, r8, r11
+	mov	r11, r3, lsl cnt
+	ldr	r3, [up, #-4]!
+	cmp	r1, r2
+	addhi	r1, r1, d
+	subhi	r8, r8, #1
+	orr	r2, r11, r3, lsr tnc
+	cmp	r1, d
+	bcs	L(ufx)
+L(uok):	str	r8, [qp], #-4
+L(mid):	add	r8, r1, #1
+	mov	r11, r2
+	umlal	r2, r8, r1, dinv
+	subs	n, n, #1
+	bne	L(utop)
+
+	mls	r1, d, r8, r11
+	mov	r11, r3, lsl cnt
+	cmp	r1, r2
+	addhi	r1, r1, d
+	subhi	r8, r8, #1
+	cmp	r1, d
+	rsbcs	r1, d, r1
+	addcs	r8, r8, #1
+	str	r8, [qp], #-4
+
+L(uend):add	r8, r1, #1
+	mov	r2, r11
+	umlal	r2, r8, r1, dinv
+	mls	r1, d, r8, r11
+	cmp	r1, r2
+	addhi	r1, r1, d
+	subhi	r8, r8, #1
+	cmp	r1, d
+	rsbcs	r1, d, r1
+	addcs	r8, r8, #1
+	str	r8, [qp], #-4
+L(frac):
+	ldr	r2, [sp, #9*4+d_arg]	C fn
+	cmp	r2, #0
+	beq	L(fend)
+
+L(ftop):mov	r6, #0
+	add	r3, r1, #1
+	umlal	r6, r3, r1, dinv
+	mov	r8, #0
+	mls	r1, d, r3, r8
+	cmp	r1, r6
+	addhi	r1, r1, d
+	subhi	r3, r3, #1
+	subs	r2, r2, #1
+	str	r3, [qp], #-4
+	bne	L(ftop)
+
+L(fend):mov	r11, r1, lsr cnt
+L(rtn):	mov	r0, r11
+	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+
+L(normalised):
+	mov	r0, d
+	bl	mpn_invert_limb
+L(nent):
+	cmp	n, #0
+	mov	r11, #0			C r
+	blt	L(nend)
+
+	ldr	r11, [up, #0]
+	cmp	r11, d
+	movlo	r2, #0			C hi q limb
+	movhs	r2, #1			C hi q limb
+	subhs	r11, r11, d
+
+	str	r2, [qp], #-4
+	cmp	n, #0
+	beq	L(nend)
+
+L(ntop):ldr	r1, [up, #-4]!
+	add	r12, r11, #1
+	umlal	r1, r12, r11, dinv
+	ldr	r3, [up, #0]
+	mls	r11, d, r12, r3
+	cmp	r11, r1
+	addhi	r11, r11, d
+	subhi	r12, r12, #1
+	cmp	d, r11
+	bls	L(nfx)
+L(nok):	str	r12, [qp], #-4
+	subs	n, n, #1
+	bne	L(ntop)
+
+L(nend):mov	r1, r11			C r
+	mov	cnt, #0			C shift cnt
+	b	L(frac)
+
+L(nfx):	add	r12, r12, #1
+	rsb	r11, d, r11
+	b	L(nok)
+L(ufx):	rsb	r1, d, r1
+	add	r8, r8, #1
+	b	L(uok)
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6t2/gcd_11.asm b/third_party/gmp/mpn/arm/v6t2/gcd_11.asm
new file mode 100644
index 0000000..8a38351
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6t2/gcd_11.asm

@@ -0,0 +1,65 @@
+dnl  ARM v6t2 mpn_gcd_11.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2019 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 -
+C Cortex-A5	 5.2
+C Cortex-A7	 5.04
+C Cortex-A8	 3.59
+C Cortex-A9	 9.5
+C Cortex-A15	 3.2
+C Cortex-A17	 5.25
+C Cortex-A53	 3.57
+
+define(`u0',    `r0')
+define(`v0',    `r1')
+
+ASM_START()
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_gcd_11)
+	subs	r3, u0, v0	C			0
+	beq	L(end)		C
+
+	ALIGN(16)
+L(top):	rbit	r12, r3		C			1,5
+	clz	r12, r12	C			2
+	rsbcc	r3, r3, #0	C v = abs(u-v), even	1
+	movcs	u0, v0		C u = min(u,v)		1
+	lsr	v0, r3, r12	C			3
+	subs	r3, u0, v0	C			4
+	bne	L(top)		C
+
+L(end):	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v6t2/gcd_22.asm b/third_party/gmp/mpn/arm/v6t2/gcd_22.asm
new file mode 100644
index 0000000..3b23808
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v6t2/gcd_22.asm

@@ -0,0 +1,113 @@
+dnl  ARM v6t2 mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 -
+C Cortex-A5	10.1
+C Cortex-A7	 9.1
+C Cortex-A8	 6.3
+C Cortex-A9	 ?
+C Cortex-A12	 7.7
+C Cortex-A15	 5.7
+C Cortex-A17	 ?
+C Cortex-A53	 7.0
+
+
+define(`gp',    `r0')
+
+define(`u1',    `r1')
+define(`u0',    `r2')
+define(`v1',    `r3')
+define(`v0',    `r4')
+
+define(`t0',    `r5')
+define(`t1',    `r6')
+define(`cnt',   `r7')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+	push	{ r4-r7 }
+
+	ldr	v0, [sp,#16]		C
+
+L(top):	subs	t0, u0, v0		C 0 7
+	beq	L(lowz)
+	sbcs	t1, u1, v1		C 1 8
+
+	rbit	cnt, t0			C 1
+
+	negcc	t0, t0
+	mvncc	t1, t1
+L(bck):	movcc	v0, u0
+	movcc	v1, u1
+
+	clz	cnt, cnt		C 2
+	rsb	r12, cnt, #32		C 3
+
+	lsr	u0, t0, cnt		C 3
+	lsl	r12, t1, r12		C 4
+	lsr	u1, t1, cnt		C 3
+	orr	u0, u0, r12		C 5
+
+	orrs	r12, u1, v1
+	bne	L(top)
+
+
+	str	r12, [gp,#4]		C high result limb <= 0
+
+	mov	r6, gp
+	mov	r0, u0			C pass 1st argument
+	mov	r1, v0			C pass 2nd argument
+	mov	r7, r14			C preserve link register
+	bl	mpn_gcd_11
+	str	r0, [r6,#0]
+	mov	r14, r7
+	pop	{ r4-r7 }
+	bx	r14
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	subs	t0, u1, v1
+	beq	L(end)
+	mov	t1, #0
+	rbit	cnt, t0			C 1
+	negcc	t0, t0
+	b	L(bck)
+
+L(end):	str	v0, [gp,#0]
+	str	v1, [gp,#4]
+	pop	{ r4-r7 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/addmul_1.asm b/third_party/gmp/mpn/arm/v7a/cora15/addmul_1.asm
new file mode 100644
index 0000000..c2277b3
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/addmul_1.asm

@@ -0,0 +1,145 @@
+dnl  ARM mpn_addmul_1 optimised for A15.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:     -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 6			3.25
+C Cortex-A15	 2			this
+
+C This code uses umlal for adding in the rp[] data, keeping the recurrency path
+C separate from any multiply instructions.  It performs well on A15, at umlal's
+C bandwidth.
+C
+C An A9 variant should perhaps stick to 3-way unrolling, and use ldm and stm
+C for all loads and stores.  Alternatively, it could do 2-way or 4-way, but
+C then alignment aware code will be necessary (adding O(1) bookkeeping
+C overhead).
+C
+C We don't use r12 due to ldrd and strd limitations.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`v0', `r3')
+
+define(`w0', `r10') define(`w1', `r11')
+define(`u0', `r8')  define(`u1', `r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	push	{ r4-r11 }
+
+	ands	r6, n, #3
+	sub	n, n, #3
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	mov	r6, #0
+	cmn	r13, #0			C carry clear
+	ldr	u1, [up], #-4
+	ldr	w1, [rp], #-4
+	mov	r7, #0
+	b	L(mid)
+
+L(b00):	ldrd	u0, u1, [up]
+	ldrd	w0, w1, [rp]
+	mov	r6, #0
+	umlal	w0, r6, u0, v0
+	cmn	r13, #0			C carry clear
+	mov	r7, #0
+	str	w0, [rp]
+	b	L(mid)
+
+L(b10):	ldrd	u0, u1, [up], #8
+	ldrd	w0, w1, [rp]
+	mov	r4, #0
+	umlal	w0, r4, u0, v0
+	cmn	r13, #0			C carry clear
+	mov	r5, #0
+	str	w0, [rp], #8
+	umlal	w1, r5, u1, v0
+	tst	n, n
+	bmi	L(end)
+	b	L(top)
+
+L(b01):	mov	r4, #0
+	ldr	u1, [up], #4
+	ldr	w1, [rp], #4
+	mov	r5, #0
+	umlal	w1, r5, u1, v0
+	tst	n, n
+	bmi	L(end)
+
+	ALIGN(16)
+L(top):	ldrd	u0, u1, [up, #0]
+	adcs	r4, r4, w1
+	ldrd	w0, w1, [rp, #0]
+	mov	r6, #0
+	umlal	w0, r6, u0, v0		C 1 2
+	adcs	r5, r5, w0
+	mov	r7, #0
+	strd	r4, r5, [rp, #-4]
+L(mid):	umlal	w1, r7, u1, v0		C 2 3
+	ldrd	u0, u1, [up, #8]
+	adcs	r6, r6, w1
+	ldrd	w0, w1, [rp, #8]
+	mov	r4, #0
+	umlal	w0, r4, u0, v0		C 3 4
+	adcs	r7, r7, w0
+	mov	r5, #0
+	strd	r6, r7, [rp, #4]
+	umlal	w1, r5, u1, v0		C 0 1
+	sub	n, n, #4
+	add	up, up, #16
+	add	rp, rp, #16
+	tst	n, n
+	bpl	L(top)
+
+L(end):	adcs	r4, r4, w1
+	str	r4, [rp, #-4]
+	adc	r0, r5, #0
+	pop	{ r4-r11 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/aors_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/aors_n.asm
new file mode 100644
index 0000000..dc3f839
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/aors_n.asm

@@ -0,0 +1,162 @@
+dnl  ARM mpn_add_n/mpn_sub_n optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:     -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.55			2.5
+C Cortex-A15	 1.27			this
+
+C This was a major improvement compared to the code we had before, but it might
+C not be the best 8-way code possible.  We've tried some permutations of auto-
+C increments and separate pointer updates, but they all ran at the same speed
+C on A15.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUBC',	adcs)
+  define(`IFADD',	`$1')
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, n, #0')
+  define(`RETVAL2',	`adc	r0, n, #1')
+  define(`func',	mpn_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUBC',	sbcs)
+  define(`IFADD',	`')
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			and	r0, r0, #1')
+  define(`RETVAL2',	`RETVAL')
+  define(`func',	mpn_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	ldr	r12, [sp]
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(func)
+	mov	r12, #0
+L(ent):	push	{ r4-r9 }
+
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	SETCY(	r12)
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	SETCY(	r12)
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	ldr	r7, [vp], #-4
+	SETCY(	r12)
+	ADDSUBC	r9, r5, r7
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	ldrd	r6, r7, [vp]
+	SETCY(	r12)
+	sub	rp, rp, #8
+	b	L(lo)
+
+	ALIGN(16)
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	strd	r8, r9, [rp, #8]
+L(mid):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #16]
+	ldrd	r6, r7, [vp, #16]
+	strd	r8, r9, [rp, #16]
+	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	sub	n, n, #2
+	tst	n, n
+	bmi	L(dne)
+	ldrd	r4, r5, [up, #24]
+	ldrd	r6, r7, [vp, #24]
+	strd	r8, r9, [rp, #24]
+	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #32]!
+	ldrd	r6, r7, [vp, #32]!
+	strd	r8, r9, [rp, #32]!
+L(lo):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	tst	n, n
+	bne	L(top)
+
+L(end):	strd	r8, r9, [rp, #8]
+L(wd1):	RETVAL
+	pop	{ r4-r9 }
+	bx	r14
+L(dne):	strd	r8, r9, [rp, #24]
+	RETVAL2
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/bdiv_q_1.asm b/third_party/gmp/mpn/arm/v7a/cora15/bdiv_q_1.asm
new file mode 100644
index 0000000..245b371
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/bdiv_q_1.asm

@@ -0,0 +1,36 @@
+dnl  ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm
new file mode 100644
index 0000000..b9e5cd3
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm

@@ -0,0 +1,158 @@
+dnl  ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:     -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.75			 3
+C Cortex-A15	 1.78			this
+
+C This code does not run as well as one could have hoped, since 1.5 c/l seems
+C realistic for this insn mix.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`cnd',`r0')
+define(`rp', `r1')
+define(`up', `r2')
+define(`vp', `r3')
+define(`n',  `r12')
+
+ifdef(`OPERATION_cnd_add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`IFADD',	`$1')
+  define(`INITCY',      `cmn	r0, #0')
+  define(`RETVAL',	`adc	r0, n, #0')
+  define(`RETVAL2',	`adc	r0, n, #1')
+  define(`func',	mpn_cnd_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_cnd_sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`IFADD',	`')
+  define(`INITCY',      `cmp	r0, #0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			and	r0, r0, #1')
+  define(`RETVAL2',	`RETVAL')
+  define(`func',	mpn_cnd_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	ldr	n, [sp]
+	push	{ r4-r9 }
+
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
+
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	bic	r7, r7, cnd
+	ADDSUB	r9, r5, r7
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	INITCY
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	ldr	r7, [vp], #-4
+	bic	r7, r7, cnd
+	ADDSUB	r9, r5, r7
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	ldrd	r6, r7, [vp]
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	INITCY
+	sub	rp, rp, #8
+	b	L(lo)
+
+	ALIGN(16)
+L(top):	ldrd	r6, r7, [vp, #8]
+	ldrd	r4, r5, [up, #8]
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	strd	r8, r9, [rp, #8]
+L(mid):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	ldrd	r6, r7, [vp, #16]!
+	ldrd	r4, r5, [up, #16]!
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	sub	n, n, #1
+	strd	r8, r9, [rp, #16]!
+L(lo):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	tst	n, n
+	bne	L(top)
+
+L(end):	strd	r8, r9, [rp, #8]
+L(wd1):	RETVAL
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/com.asm b/third_party/gmp/mpn/arm/v7a/cora15/com.asm
new file mode 100644
index 0000000..a258afe
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/com.asm

@@ -0,0 +1,180 @@
+dnl  ARM mpn_com optimised for A15.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	2.5
+C Cortex-A15	1.0
+
+C This is great A15 core register code, but it is a bit large.
+C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
+define(`UNROLL', 4x2)		C alternatives: 4 4x2
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	push	{ r4-r5,r8-r9 }
+
+ifelse(FEEDIN_VARIANT,0,`
+	ands	r12, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00a)
+	tst	r12, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	mvn	r9, r5
+	str	r9, [rp], #4
+	tst	r12, #2
+	beq	L(b00)
+L(bx0):	ldrd	r4, r5, [up, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+L(b00a):ldrd	r4, r5, [up], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,1,`
+	and	r12, n, #3
+	mov	n, n, lsr #2
+	tst	r12, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	mvn	r9, r5
+	str	r9, [rp], #4
+L(bx0):	tst	r12, #2
+	beq	L(b00)
+	ldrd	r4, r5, [up, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+	ldrd	r4, r5, [up], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,2,`
+	ands	r12, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r12, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	mvn	r9, r5
+	ldrd	r4, r5, [up, #0]
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	mvn	r9, r5
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	sub	rp, rp, #8
+	b	L(lo)
+')
+	ALIGN(16)
+ifelse(UNROLL,4,`
+L(top):	ldrd	r4, r5, [up, #8]
+	strd	r8, r9, [rp, #8]
+L(mid):	mvn	r8, r4
+	mvn	r9, r5
+	ldrd	r4, r5, [up, #16]!
+	strd	r8, r9, [rp, #16]!
+	sub	n, n, #1
+L(lo):	mvn	r8, r4
+	mvn	r9, r5
+	tst	n, n
+	bne	L(top)
+')
+ifelse(UNROLL,4x2,`
+L(top):	ldrd	r4, r5, [up, #8]
+	strd	r8, r9, [rp, #8]
+L(mid):	mvn	r8, r4
+	mvn	r9, r5
+	ldrd	r4, r5, [up, #16]
+	strd	r8, r9, [rp, #16]
+	mvn	r8, r4
+	mvn	r9, r5
+	sub	n, n, #2
+	tst	n, n
+	bmi	L(dne)
+	ldrd	r4, r5, [up, #24]
+	strd	r8, r9, [rp, #24]
+	mvn	r8, r4
+	mvn	r9, r5
+	ldrd	r4, r5, [up, #32]!
+	strd	r8, r9, [rp, #32]!
+L(lo):	mvn	r8, r4
+	mvn	r9, r5
+	tst	n, n
+	bne	L(top)
+')
+
+L(end):	strd	r8, r9, [rp, #8]
+L(wd1):	pop	{ r4-r5,r8-r9 }
+	bx	r14
+ifelse(UNROLL,4x2,`
+L(dne):	strd	r8, r9, [rp, #24]
+	pop	{ r4-r5,r8-r9 }
+	bx	r14
+')
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora15/gmp-mparam.h
new file mode 100644
index 0000000..409cbbb
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/gmp-mparam.h

@@ -0,0 +1,212 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 2000 MHz Cortex-A15 with Neon (in spite of file position) */
+/* FFT tuning limit = 50,736,668 */
+/* Generated by tuneup.c, 2019-10-22, gcc 5.4 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 49.14% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           17
+
+#define DIV_1_VS_MUL_1_PERCENT             267
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD               114
+#define MUL_TOOM44_THRESHOLD               178
+#define MUL_TOOM6H_THRESHOLD               238
+#define MUL_TOOM8H_THRESHOLD               597
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     113
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     115
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     154
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 38
+#define SQR_TOOM3_THRESHOLD                126
+#define SQR_TOOM4_THRESHOLD                336
+#define SQR_TOOM6_THRESHOLD                446
+#define SQR_TOOM8_THRESHOLD                650
+
+#define MULMID_TOOM42_THRESHOLD             52
+
+#define MULMOD_BNM1_THRESHOLD               23
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             575  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    575, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     19, 6}, \
+    {     39, 7}, {     25, 6}, {     51, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     51, 8}, {     27, 9}, {     15, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
+    {     55,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     71, 8}, {    143, 9}, \
+    {     87,10}, {     47, 9}, {    111,11}, {     31,10}, \
+    {     63, 9}, {    143,10}, {     79, 9}, {    159,10}, \
+    {     95,11}, {     63,10}, {    143, 9}, {    287,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    335, 9}, \
+    {    671,10}, {    367, 9}, {    735,11}, {    191,10}, \
+    {    383, 9}, {    799,10}, {    415,11}, {    223,12}, \
+    {    127,10}, {    543,11}, {    287,10}, {    575,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    575,12}, {    319,11}, \
+    {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    959,13}, {    255,12}, \
+    {    511,11}, {   1087,12}, {    575,11}, {   1151,12}, \
+    {    639,11}, {   1343,12}, {    703,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    895,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    639,12}, {   1407,13}, \
+    {    767,12}, {   1599,13}, {    895,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
+    {   1279,12}, {   2559,13}, {   1407,14}, {    767,13}, \
+    {   1535,12}, {   3135,13}, {   1663,15}, {    511,14}, \
+    {   1023,13}, {   2303,14}, {   1279,13}, {   2559,12}, \
+    {   5119,13}, {   2687,14}, {   1535,13}, {   3071,12}, \
+    {   6143,13}, {   3199,12}, {   6399,14}, {   1791,15}, \
+    {   1023,14}, {   2047,13}, {   4095,14}, {   2303,13}, \
+    {   4607,12}, {   9215,13}, {   4863,12}, {   9727,14}, \
+    {   2559,13}, {   5119,15}, {   1535,14}, {   3071,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 155
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             525  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    525, 5}, {     25, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     25, 6}, {     51, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 9}, \
+    {     15, 8}, {     31, 7}, {     63, 8}, {     39, 9}, \
+    {     23, 8}, {     51,10}, {     15, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     99, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     95,11}, {     63,10}, {    143, 9}, \
+    {    287, 8}, {    575, 9}, {    303,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
+    {    191,10}, {    399, 9}, {    799,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,12}, {    127,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703,12}, {    191,11}, \
+    {    383,10}, {    799,11}, {    415,10}, {    831,11}, \
+    {    447,13}, {    127,11}, {    543,10}, {   1087,11}, \
+    {    607,12}, {    319,11}, {    735,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    959,12}, {    511,11}, \
+    {   1023,12}, {    575,11}, {   1151,12}, {    639,11}, \
+    {   1279,12}, {    703,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    831,11}, {   1663,12}, {    895,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    639,12}, \
+    {   1343,13}, {    767,12}, {   1599,13}, {    895,14}, \
+    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
+    {   2303,13}, {   1279,14}, {    767,13}, {   1535,12}, \
+    {   3135,13}, {   1663,15}, {    511,14}, {   1023,13}, \
+    {   2047,12}, {   4095,13}, {   2303,14}, {   1279,13}, \
+    {   2559,12}, {   5119,14}, {   1535,13}, {   3071,12}, \
+    {   6143,13}, {   3199,12}, {   6399,14}, {   1791,15}, \
+    {   1023,14}, {   2047,13}, {   4095,14}, {   2303,13}, \
+    {   4607,12}, {   9215,13}, {   4863,12}, {   9727,14}, \
+    {   2559,15}, {   1535,14}, {   3071,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 154
+#define SQR_FFT_THRESHOLD                 5312
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  38
+#define MULLO_MUL_N_THRESHOLD            10950
+#define SQRLO_BASECASE_THRESHOLD            10
+#define SQRLO_DC_THRESHOLD                  35
+#define SQRLO_SQR_THRESHOLD              10323
+
+#define DC_DIV_QR_THRESHOLD                 57
+#define DC_DIVAPPR_Q_THRESHOLD             254
+#define DC_BDIV_QR_THRESHOLD                48
+#define DC_BDIV_Q_THRESHOLD                286
+
+#define INV_MULMOD_BNM1_THRESHOLD           55
+#define INV_NEWTON_THRESHOLD               252
+#define INV_APPR_THRESHOLD                 252
+
+#define BINV_NEWTON_THRESHOLD              372
+#define REDC_1_TO_REDC_2_THRESHOLD          61
+#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
+
+#define MU_DIV_QR_THRESHOLD               1858
+#define MU_DIVAPPR_Q_THRESHOLD            1787
+#define MUPI_DIV_QR_THRESHOLD              122
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               1836
+
+#define POWM_SEC_TABLE  1,14,200,480,1532
+
+#define GET_STR_DC_THRESHOLD                16
+#define GET_STR_PRECOMPUTE_THRESHOLD        33
+#define SET_STR_DC_THRESHOLD               104
+#define SET_STR_PRECOMPUTE_THRESHOLD      1120
+
+#define FAC_DSC_THRESHOLD                  164
+#define FAC_ODD_THRESHOLD                   27
+
+#define MATRIX22_STRASSEN_THRESHOLD         19
+#define HGCD2_DIV1_METHOD                    1  /* 3.70% faster than 3 */
+#define HGCD_THRESHOLD                     137
+#define HGCD_APPR_THRESHOLD                157
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   610
+#define GCDEXT_DC_THRESHOLD                443
+#define JACOBI_BASE_METHOD                   4  /* 12.66% faster than 1 */
+
+/* Tuneup completed successfully, took 69757 seconds */

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/logops_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/logops_n.asm
new file mode 100644
index 0000000..0602614
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/logops_n.asm

@@ -0,0 +1,253 @@
+dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc, optimised for A15.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb             cycles/limb
+C          and andn ior xor         nand iorn nior xnor
+C StrongARM	 ?			 ?
+C XScale	 ?			 ?
+C Cortex-A7	 ?			 ?
+C Cortex-A8	 ?			 ?
+C Cortex-A9	3.5			3.56
+C Cortex-A15	1.27			1.64
+
+C This is great A15 core register code, but it is a bit large.
+C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
+define(`UNROLL', 4x2)		C alternatives: 4 4x2
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic	$1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `bic	$1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{ r4-r9 }
+
+ifelse(FEEDIN_VARIANT,0,`
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00a)
+	tst	r6, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #4
+	tst	r6, #2
+	beq	L(b00)
+L(bx0):	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+L(b00a):ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,1,`
+	and	r6, n, #3
+	mov	n, n, lsr #2
+	tst	r6, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #4
+L(bx0):	tst	r6, #2
+	beq	L(b00)
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,2,`
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	POSTOP(	r9)
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	ldr	r7, [vp], #-4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	ldrd	r6, r7, [vp]
+	sub	rp, rp, #8
+	b	L(lo)
+')
+	ALIGN(16)
+ifelse(UNROLL,4,`
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(mid):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #16]!
+	ldrd	r6, r7, [vp, #16]!
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #16]!
+	sub	n, n, #1
+L(lo):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	tst	n, n
+	bne	L(top)
+')
+ifelse(UNROLL,4x2,`
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(mid):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #16]
+	ldrd	r6, r7, [vp, #16]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #16]
+	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	sub	n, n, #2
+	tst	n, n
+	bmi	L(dne)
+	ldrd	r4, r5, [up, #24]
+	ldrd	r6, r7, [vp, #24]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #24]
+	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #32]!
+	ldrd	r6, r7, [vp, #32]!
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #32]!
+L(lo):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	tst	n, n
+	bne	L(top)
+')
+
+L(end):	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(wd1):	pop	{ r4-r9 }
+	bx	r14
+ifelse(UNROLL,4x2,`
+L(dne):	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #24]
+	pop	{ r4-r9 }
+	bx	r14
+')
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/mul_1.asm b/third_party/gmp/mpn/arm/v7a/cora15/mul_1.asm
new file mode 100644
index 0000000..766ba5c
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/mul_1.asm

@@ -0,0 +1,104 @@
+dnl  ARM mpn_mul_1 optimised for A15.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:	 -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.25			3.25
+C Cortex-A15	 2.25			this
+
+
+C This runs well on A15 but very poorly on A9.  By scheduling loads and adds
+C it is possible to get good A9 performance as well, but at the cost of using
+C many more (callee-saves) registers.
+
+C This is armv5 code, optimized for the armv7a cpu A15.  Its location in the
+C GMP file structure might be misleading.
+
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`v0', `r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+	ldr	r12, [sp]
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+	mov	r12, #0
+L(ent):	push	{r4-r7}
+
+	ldr	r6, [up], #4
+	tst	n, #1
+	beq	L(bx0)
+
+L(bx1):	umull	r4, r7, r6, v0
+	adds	r4, r4, r12
+	tst	n, #2
+	beq	L(lo1)
+	b	L(lo3)
+
+L(bx0):	umull	r4, r5, r6, v0
+	adds	r4, r4, r12
+	tst	n, #2
+	beq	L(lo0)
+	b	L(lo2)
+
+L(top):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r5, r6, v0
+	adds	r4, r4, r7
+L(lo0):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r7, r6, v0
+	adcs	r4, r4, r5
+L(lo3):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r5, r6, v0
+	adcs	r4, r4, r7
+L(lo2):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r7, r6, v0
+	adcs	r4, r4, r5
+L(lo1):	adc	r7, r7, #0
+	subs	n, n, #4
+	bgt	L(top)
+
+	str	r4, [rp]
+	mov	r0, r7
+	pop	{r4-r7}
+	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
new file mode 100644
index 0000000..d8cfe3f
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm

@@ -0,0 +1,43 @@
+dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH,		1)
+
+ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+
+include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
new file mode 100644
index 0000000..b48204d
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm

@@ -0,0 +1,43 @@
+dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH,		2)
+
+ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+
+include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
new file mode 100644
index 0000000..51f93c1
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm

@@ -0,0 +1,144 @@
+dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.25
+C Cortex-A15	 2.25
+
+C TODO
+C  * Consider using 4-way feed-in code.
+C  * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
+C    insufficiently for A7 and A8.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`DO_add', `
+  define(`ADCSBCS',	`adcs	$1, $2, $3')
+  define(`CLRCY',	`cmn	r13, #1')
+  define(`RETVAL',	`adc	r0, $1, #0')
+  define(`func',	mpn_addlsh`'LSH`'_n)')
+ifdef(`DO_sub', `
+  define(`ADCSBCS',	`sbcs	$1, $2, $3')
+  define(`CLRCY',	`cmp	r13, #0')
+  define(`RETVAL',	`sbc	$2, $2, $2
+			cmn	$2, #1
+			adc	r0, $1, #0')
+  define(`func',	mpn_sublsh`'LSH`'_n)')
+ifdef(`DO_rsb', `
+  define(`ADCSBCS',	`sbcs	$1, $3, $2')
+  define(`CLRCY',	`cmp	r13, #0')
+  define(`RETVAL',	`sbc	r0, $1, #0')
+  define(`func',	mpn_rsblsh`'LSH`'_n)')
+
+
+ASM_START()
+PROLOGUE(func)
+	push	 {r4-r10}
+	vmov.i8	 d0, #0			C could feed carry through here
+	CLRCY
+	tst	n, #1
+	beq	L(bb0)
+
+L(bb1):	vld1.32	 {d3[0]}, [vp]!
+	vsli.u32 d0, d3, #LSH
+	ldr	 r12, [up], #4
+	vmov.32	 r5, d0[0]
+	vshr.u32 d0, d3, #32-LSH
+	ADCSBCS( r12, r12, r5)
+	str	 r12, [rp], #4
+	bics	 n, n, #1
+	beq	 L(rtn)
+
+L(bb0):	tst	n, #2
+	beq	L(b00)
+
+L(b10):	vld1.32	 {d3}, [vp]!
+	vsli.u64 d0, d3, #LSH
+	ldmia	 up!, {r10,r12}
+	vmov	 r4, r5, d0
+	vshr.u64 d0, d3, #64-LSH
+	ADCSBCS( r10, r10, r4)
+	ADCSBCS( r12, r12, r5)
+	stmia	 rp!, {r10,r12}
+	bics	 n, n, #2
+	beq	 L(rtn)
+
+L(b00):	vld1.32	 {d2}, [vp]!
+	vsli.u64 d0, d2, #LSH
+	vshr.u64 d1, d2, #64-LSH
+	vld1.32	 {d3}, [vp]!
+	vsli.u64 d1, d3, #LSH
+	vmov	 r6, r7, d0
+	vshr.u64 d0, d3, #64-LSH
+	sub	 n, n, #4
+	tst	 n, n
+	beq	 L(end)
+
+	ALIGN(16)
+L(top):	ldmia	 up!, {r8,r9,r10,r12}
+	vld1.32	 {d2}, [vp]!
+	vsli.u64 d0, d2, #LSH
+	vmov	 r4, r5, d1
+	vshr.u64 d1, d2, #64-LSH
+	ADCSBCS( r8, r8, r6)
+	ADCSBCS( r9, r9, r7)
+	vld1.32	 {d3}, [vp]!
+	vsli.u64 d1, d3, #LSH
+	vmov	 r6, r7, d0
+	vshr.u64 d0, d3, #64-LSH
+	ADCSBCS( r10, r10, r4)
+	ADCSBCS( r12, r12, r5)
+	stmia	 rp!, {r8,r9,r10,r12}
+	sub	 n, n, #4
+	tst	 n, n
+	bne	 L(top)
+
+L(end):	ldmia	 up!, {r8,r9,r10,r12}
+	vmov	 r4, r5, d1
+	ADCSBCS( r8, r8, r6)
+	ADCSBCS( r9, r9, r7)
+	ADCSBCS( r10, r10, r4)
+	ADCSBCS( r12, r12, r5)
+	stmia	 rp!, {r8,r9,r10,r12}
+L(rtn):	vmov.32	 r0, d0[0]
+	RETVAL(	 r0, r1)
+	pop	 {r4-r10}
+	bx	 r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/com.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/com.asm
new file mode 100644
index 0000000..9e7a629
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/com.asm

@@ -0,0 +1,97 @@
+dnl  ARM Neon mpn_com optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.1
+C Cortex-A15	 0.65
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	cmp		n, #7
+	ble		L(bc)
+
+C Perform a few initial operation until rp is 128-bit aligned
+	tst		rp, #4
+	beq		L(al1)
+	vld1.32		{d0[0]}, [up]!
+	sub		n, n, #1
+	vmvn		d0, d0
+	vst1.32		{d0[0]}, [rp]!
+L(al1):	tst		rp, #8
+	beq		L(al2)
+	vld1.32		{d0}, [up]!
+	sub		n, n, #2
+	vmvn		d0, d0
+	vst1.32		{d0}, [rp:64]!
+L(al2):	vld1.32		{q2}, [up]!
+	subs		n, n, #12
+	blt		L(end)
+
+	ALIGN(16)
+L(top):	vld1.32		{q0}, [up]!
+	vmvn		q2, q2
+	subs		n, n, #8
+	vst1.32		{q2}, [rp:128]!
+	vld1.32		{q2}, [up]!
+	vmvn		q0, q0
+	vst1.32		{q0}, [rp:128]!
+	bge	L(top)
+
+L(end):	vmvn		q2, q2
+	vst1.32		{q2}, [rp:128]!
+
+C Handle last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst		n, #4
+	beq		L(tl1)
+	vld1.32		{q0}, [up]!
+	vmvn		q0, q0
+	vst1.32		{q0}, [rp]!
+L(tl1):	tst		n, #2
+	beq		L(tl2)
+	vld1.32		{d0}, [up]!
+	vmvn		d0, d0
+	vst1.32		{d0}, [rp]!
+L(tl2):	tst		n, #1
+	beq		L(tl3)
+	vld1.32		{d0[0]}, [up]
+	vmvn		d0, d0
+	vst1.32		{d0[0]}, [rp]
+L(tl3):	bx		lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/copyd.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
new file mode 100644
index 0000000..98fe535
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/copyd.asm

@@ -0,0 +1,110 @@
+dnl  ARM Neon mpn_copyd optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.75		slower than core register code
+C Cortex-A15	 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+	add	rp, rp, n, lsl #2
+	add	up, up, n, lsl #2
+
+	cmp	n, #7
+	ble	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tst	rp, #4
+	beq	L(al1)
+	sub	up, up, #4
+	vld1.32	{d22[0]}, [up]
+	sub	n, n, #1
+	sub	rp, rp, #4
+	vst1.32	{d22[0]}, [rp]
+L(al1):	tst	rp, #8
+	beq	L(al2)
+	sub	up, up, #8
+	vld1.32	{d22}, [up]
+	sub	n, n, #2
+	sub	rp, rp, #8
+	vst1.32	{d22}, [rp:64]
+L(al2):	sub	up, up, #16
+	vld1.32	{d26-d27}, [up]
+	subs	n, n, #12
+	sub	rp, rp, #16			C offset rp for loop
+	blt	L(end)
+
+	sub	up, up, #16			C offset up for loop
+	mov	r12, #-16
+
+	ALIGN(16)
+L(top):	vld1.32	{d22-d23}, [up], r12
+	vst1.32	{d26-d27}, [rp:128], r12
+	vld1.32	{d26-d27}, [up], r12
+	vst1.32	{d22-d23}, [rp:128], r12
+	subs	n, n, #8
+	bge	L(top)
+
+	add	up, up, #16			C undo up offset
+						C rp offset undoing folded
+L(end):	vst1.32	{d26-d27}, [rp:128]
+
+C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst	n, #4
+	beq	L(tl1)
+	sub	up, up, #16
+	vld1.32	{d22-d23}, [up]
+	sub	rp, rp, #16
+	vst1.32	{d22-d23}, [rp]
+L(tl1):	tst	n, #2
+	beq	L(tl2)
+	sub	up, up, #8
+	vld1.32	{d22}, [up]
+	sub	rp, rp, #8
+	vst1.32	{d22}, [rp]
+L(tl2):	tst	n, #1
+	beq	L(tl3)
+	sub	up, up, #4
+	vld1.32	{d22[0]}, [up]
+	sub	rp, rp, #4
+	vst1.32	{d22[0]}, [rp]
+L(tl3):	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/copyi.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
new file mode 100644
index 0000000..2e05afe
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/copyi.asm

@@ -0,0 +1,90 @@
+dnl  ARM Neon mpn_copyi optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.75		slower than core register code
+C Cortex-A15	 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	cmp	n, #7
+	ble	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tst	rp, #4
+	beq	L(al1)
+	vld1.32	{d22[0]}, [up]!
+	sub	n, n, #1
+	vst1.32	{d22[0]}, [rp]!
+L(al1):	tst	rp, #8
+	beq	L(al2)
+	vld1.32	{d22}, [up]!
+	sub	n, n, #2
+	vst1.32	{d22}, [rp:64]!
+L(al2):	vld1.32	{d26-d27}, [up]!
+	subs	n, n, #12
+	blt	L(end)
+
+	ALIGN(16)
+L(top):	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d26-d27}, [rp:128]!
+	vld1.32	{d26-d27}, [up]!
+	vst1.32	{d22-d23}, [rp:128]!
+	subs	n, n, #8
+	bge	L(top)
+
+L(end):	vst1.32	{d26-d27}, [rp:128]!
+
+C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst	n, #4
+	beq	L(tl1)
+	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d22-d23}, [rp]!
+L(tl1):	tst	n, #2
+	beq	L(tl2)
+	vld1.32	{d22}, [up]!
+	vst1.32	{d22}, [rp]!
+L(tl2):	tst	n, #1
+	beq	L(tl3)
+	vld1.32	{d22[0]}, [up]
+	vst1.32	{d22[0]}, [rp]
+L(tl3):	bx	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/third_party/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
new file mode 100644
index 0000000..2c11d6d
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm

@@ -0,0 +1,177 @@
+dnl  ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	4-5
+C Cortex-A15	 2.5
+
+C TODO
+C  * Try to make this smaller, its size (384 bytes) is excessive.
+C  * Try to reach 2.25 c/l on A15, to match the addlsh_1 family.
+C  * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
+C    insufficiently for A7 and A8.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+  define(`ADDSUBS',	`adds	$1, $2, $3')
+  define(`ADCSBCS',	`adcs	$1, $2, $3')
+  define(`IFADD',	`$1')
+  define(`IFSUB',	`')
+  define(`func',	mpn_rsh1add_n)')
+ifdef(`OPERATION_rsh1sub_n', `
+  define(`ADDSUBS',	`subs	$1, $2, $3')
+  define(`ADCSBCS',	`sbcs	$1, $2, $3')
+  define(`IFADD',	`')
+  define(`IFSUB',	`$1')
+  define(`func',	mpn_rsh1sub_n)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	 {r4-r10}
+
+	ands	r4, n, #3
+	beq	L(b00)
+	cmp	r4, #2
+	blo	L(b01)
+	beq	L(b10)
+
+L(b11):	ldmia	 up!, {r9,r10,r12}
+	ldmia	 vp!, {r5,r6,r7}
+	ADDSUBS( r9, r9, r5)
+	vmov	 d4, r9, r9
+	ADCSBCS( r10, r10, r6)
+	ADCSBCS( r12, r12, r7)
+	vshr.u64 d3, d4, #1
+	vmov	 d1, r10, r12
+	vsli.u64 d3, d1, #31
+	vshr.u64 d2, d1, #1
+	vst1.32	 d3[0], [rp]!
+	bics	 n, n, #3
+	beq	 L(wd2)
+L(gt3):	ldmia	 up!, {r8,r9,r10,r12}
+	ldmia	 vp!, {r4,r5,r6,r7}
+	b	 L(mi0)
+
+L(b10):	ldmia	 up!, {r10,r12}
+	ldmia	 vp!, {r6,r7}
+	ADDSUBS( r10, r10, r6)
+	ADCSBCS( r12, r12, r7)
+	vmov	 d4, r10, r12
+	bics	 n, n, #2
+	vshr.u64 d2, d4, #1
+	beq	 L(wd2)
+L(gt2):	ldmia	 up!, {r8,r9,r10,r12}
+	ldmia	 vp!, {r4,r5,r6,r7}
+	b	 L(mi0)
+
+L(b01):	ldr	 r12, [up], #4
+	ldr	 r7, [vp], #4
+	ADDSUBS( r12, r12, r7)
+	vmov	 d4, r12, r12
+	bics	 n, n, #1
+	bne	 L(gt1)
+	mov	 r5, r12, lsr #1
+IFADD(`	adc	 r1, n, #0')
+IFSUB(`	adc	 r1, n, #1')
+	bfi	 r5, r1, #31, #1
+	str	 r5, [rp]
+	and	 r0, r12, #1
+	pop	 {r4-r10}
+	bx	 r14
+L(gt1):	ldmia	 up!, {r8,r9,r10,r12}
+	ldmia	 vp!, {r4,r5,r6,r7}
+	vshr.u64 d2, d4, #1
+	ADCSBCS( r8, r8, r4)
+	ADCSBCS( r9, r9, r5)
+	vmov	 d0, r8, r9
+	ADCSBCS( r10, r10, r6)
+	ADCSBCS( r12, r12, r7)
+	vsli.u64 d2, d0, #31
+	vshr.u64 d3, d0, #1
+	vst1.32	 d2[0], [rp]!
+	b	 L(mi1)
+
+L(b00):	ldmia	 up!, {r8,r9,r10,r12}
+	ldmia	 vp!, {r4,r5,r6,r7}
+	ADDSUBS( r8, r8, r4)
+	ADCSBCS( r9, r9, r5)
+	vmov	 d4, r8, r9
+	ADCSBCS( r10, r10, r6)
+	ADCSBCS( r12, r12, r7)
+	vshr.u64 d3, d4, #1
+	b	 L(mi1)
+
+	ALIGN(16)
+L(top):	ldmia	 up!, {r8,r9,r10,r12}
+	ldmia	 vp!, {r4,r5,r6,r7}
+	vsli.u64 d3, d1, #63
+	vshr.u64 d2, d1, #1
+	vst1.32	 d3, [rp]!
+L(mi0):	ADCSBCS( r8, r8, r4)
+	ADCSBCS( r9, r9, r5)
+	vmov	 d0, r8, r9
+	ADCSBCS( r10, r10, r6)
+	ADCSBCS( r12, r12, r7)
+	vsli.u64 d2, d0, #63
+	vshr.u64 d3, d0, #1
+	vst1.32	 d2, [rp]!
+L(mi1):	vmov	 d1, r10, r12
+	sub	 n, n, #4
+	tst	 n, n
+	bne	 L(top)
+
+L(end):	vsli.u64 d3, d1, #63
+	vshr.u64 d2, d1, #1
+	vst1.32	 d3, [rp]!
+L(wd2):	vmov	 r4, r5, d2
+IFADD(`	adc	 r1, n, #0')
+IFSUB(`	adc	 r1, n, #1')
+	bfi	 r5, r1, #31, #1
+	stm	 rp, {r4,r5}
+
+L(rtn):	vmov.32	 r0, d4[0]
+	and	 r0, r0, #1
+	pop	 {r4-r10}
+	bx	 r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora15/submul_1.asm b/third_party/gmp/mpn/arm/v7a/cora15/submul_1.asm
new file mode 100644
index 0000000..ed7bfe8
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora15/submul_1.asm

@@ -0,0 +1,159 @@
+dnl  ARM mpn_submul_1 optimised for A15.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:     -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.75			3.75
+C Cortex-A15	 2.32			this
+
+C This code uses umlal and umaal for adding in the rp[] data, keeping the
+C recurrency path separate from any multiply instructions.  It performs well on
+C A15, but not quite at the multiply bandwidth like the corresponding addmul_1
+C code.
+C
+C We don't use r12 due to ldrd and strd limitations.
+C
+C This loop complements U on the fly,
+C   U' = B^n - 1 - U
+C and then uses that
+C   R - U*v = R + U'*v + v - B^n v
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`v0', `r3')
+
+define(`w0', `r10') define(`w1', `r11')
+define(`u0', `r8')  define(`u1', `r9')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	sub	sp, sp, #32
+	strd	r10, r11, [sp, #24]
+	strd	r8, r9, [sp, #16]
+	strd	r6, r7, [sp, #8]
+	strd	r4, r5, [sp, #0]
+C	push	{ r4-r11 }
+
+	ands	r6, n, #3
+	sub	n, n, #3
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	mov	r6, #0
+	ldr	u1, [up], #-4
+	ldr	w1, [rp], #-16
+	mvn	u1, u1
+	adds	r7, v0, #0
+	b	L(mid)
+
+L(b00):	ldrd	u0, u1, [up]
+	ldrd	w0, w1, [rp], #-12
+	mvn	u0, u0
+	mvn	u1, u1
+	mov	r6, v0
+	umaal	w0, r6, u0, v0
+	cmn	r13, #0			C carry clear
+	mov	r7, #0
+	str	w0, [rp, #12]
+	b	L(mid)
+
+L(b10):	ldrd	u0, u1, [up], #8
+	ldrd	w0, w1, [rp]
+	mvn	u0, u0
+	mvn	u1, u1
+	mov	r4, v0
+	umaal	w0, r4, u0, v0
+	mov	r5, #0
+	str	w0, [rp], #-4
+	umlal	w1, r5, u1, v0
+	adds	n, n, #0
+	bmi	L(end)
+	b	L(top)
+
+L(b01):	ldr	u1, [up], #4
+	ldr	w1, [rp], #-8
+	mvn	u1, u1
+	mov	r5, v0
+	mov	r4, #0
+	umaal	w1, r5, u1, v0
+	tst	n, n
+	bmi	L(end)
+
+C	ALIGN(16)
+L(top):	ldrd	u0, u1, [up, #0]
+	adcs	r4, r4, w1
+	mvn	u0, u0
+	ldrd	w0, w1, [rp, #12]
+	mvn	u1, u1
+	mov	r6, #0
+	umlal	w0, r6, u0, v0		C 1 2
+	adcs	r5, r5, w0
+	mov	r7, #0
+	strd	r4, r5, [rp, #8]
+L(mid):	umaal	w1, r7, u1, v0		C 2 3
+	ldrd	u0, u1, [up, #8]
+	add	up, up, #16
+	adcs	r6, r6, w1
+	mvn	u0, u0
+	ldrd	w0, w1, [rp, #20]
+	mvn	u1, u1
+	mov	r4, #0
+	umlal	w0, r4, u0, v0		C 3 4
+	adcs	r7, r7, w0
+	mov	r5, #0
+	strd	r6, r7, [rp, #16]!
+	sub	n, n, #4
+	umlal	w1, r5, u1, v0		C 0 1
+	tst	n, n
+	bpl	L(top)
+
+L(end):	adcs	r4, r4, w1
+	str	r4, [rp, #8]
+	adc	r0, r5, #0
+	sub	r0, v0, r0
+	pop	{ r4-r11 }
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora17/addmul_1.asm b/third_party/gmp/mpn/arm/v7a/cora17/addmul_1.asm
new file mode 100644
index 0000000..c11ed47
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora17/addmul_1.asm

@@ -0,0 +1,34 @@
+dnl  ARM mpn_addmul_1
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_addmul_1)
+include_mpn(`arm/v6/addmul_1.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora17/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora17/gmp-mparam.h
new file mode 100644
index 0000000..143d4bc
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora17/gmp-mparam.h

@@ -0,0 +1,233 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1800 MHz Cortex-A17 with Neon (in spite of file position) */
+/* FFT tuning limit = 51243975 */
+/* Generated by tuneup.c, 2019-10-29, gcc 6.3 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 54.08% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           45
+
+#define DIV_1_VS_MUL_1_PERCENT             248
+
+#define MUL_TOOM22_THRESHOLD                38
+#define MUL_TOOM33_THRESHOLD               132
+#define MUL_TOOM44_THRESHOLD               200
+#define MUL_TOOM6H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     179
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     145
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     191
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 62
+#define SQR_TOOM3_THRESHOLD                189
+#define SQR_TOOM4_THRESHOLD                354
+#define SQR_TOOM6_THRESHOLD                426
+#define SQR_TOOM8_THRESHOLD                608
+
+#define MULMID_TOOM42_THRESHOLD             62
+
+#define MULMOD_BNM1_THRESHOLD               21
+#define SQRMOD_BNM1_THRESHOLD               29
+
+#define MUL_FFT_MODF_THRESHOLD             595  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    595, 5}, {     29, 6}, {     15, 5}, {     31, 6}, \
+    {     16, 5}, {     33, 6}, {     29, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 7}, {     55, 9}, {     15, 8}, {     31, 7}, \
+    {     63, 8}, {     43, 9}, {     23, 8}, {     55, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     83, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
+    {    143, 8}, {    575,10}, {    159,11}, {     95,10}, \
+    {    191, 9}, {    383, 8}, {    767, 9}, {    399, 8}, \
+    {    799,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511, 8}, {   1023, 9}, {    543, 8}, {   1087, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351, 9}, \
+    {    703,10}, {    367, 9}, {    735,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
+    {    415, 9}, {    831,10}, {    431, 9}, {    863,11}, \
+    {    223,10}, {    447,12}, {    127,10}, {    511, 9}, \
+    {   1023,10}, {    543, 9}, {   1087,10}, {    607, 9}, \
+    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
+    {    351,10}, {    735,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    863,11}, {    447,10}, \
+    {    895,13}, {    127,11}, {    511,10}, {   1023,11}, \
+    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
+    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
+    {    863,10}, {   1727,12}, {    447,11}, {    991,10}, \
+    {   1983,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
+    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
+    {   1599,12}, {    831,11}, {   1727,12}, {    959,11}, \
+    {   1983,13}, {    511,12}, {   1087,11}, {   2239,12}, \
+    {   1215,11}, {   2431,13}, {    639,12}, {   1471,11}, \
+    {   2943,13}, {    767,12}, {   1727,13}, {    895,12}, \
+    {   1983,14}, {    511,13}, {   1023,12}, {   2239,13}, \
+    {   1151,12}, {   2495,13}, {   1279,12}, {   2623,13}, \
+    {   1407,12}, {   2943,14}, {    767,13}, {   1535,12}, \
+    {   3135,13}, {   1663,12}, {   3455,13}, {   1919,12}, \
+    {   3839,15}, {    511,14}, {   1023,13}, {   2175,12}, \
+    {   4479,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
+    {   5887,14}, {   1535,13}, {   3455,14}, {   1791,13}, \
+    {   3967,15}, {   1023,14}, {   2047,13}, {   4479,14}, \
+    {   2303,13}, {   4991,12}, {   9983,14}, {   2559,13}, \
+    {   5247,14}, {   2815,13}, {   5887,15}, {   1535,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 194
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             500  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    500, 5}, {     29, 6}, {     15, 5}, {     31, 6}, \
+    {     16, 5}, {     33, 6}, {     29, 7}, {     15, 6}, \
+    {     32, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383, 8}, {    767, 9}, {    399,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,10}, {    351, 9}, {    703,10}, {    367, 9}, \
+    {    735,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,10}, {    415, 9}, {    831,10}, \
+    {    431, 9}, {    863,10}, {    447,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    319,10}, {    671,11}, {    351,10}, {    735,12}, \
+    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
+    {    863,11}, {    447,10}, {    895,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,11}, {    607,10}, {   1215,12}, {    319,11}, \
+    {    671,10}, {   1343,11}, {    735,10}, {   1471,12}, \
+    {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
+    {    447,11}, {    959,10}, {   1919,11}, {    991,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
+    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
+    {   1599,12}, {    831,11}, {   1727,12}, {    959,11}, \
+    {   1919,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2239,12}, {   1215,11}, {   2431,13}, {    639,12}, \
+    {   1471,11}, {   2943,13}, {    767,12}, {   1727,13}, \
+    {    895,12}, {   1983,14}, {    511,13}, {   1023,12}, \
+    {   2239,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
+    {   2623,13}, {   1407,12}, {   2943,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1663,12}, {   3455,13}, \
+    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
+    {   2175,12}, {   4479,13}, {   2431,14}, {   1279,13}, \
+    {   2943,12}, {   5887,14}, {   1535,13}, {   3455,14}, \
+    {   1791,13}, {   3967,15}, {   1023,14}, {   2047,13}, \
+    {   4479,14}, {   2303,13}, {   4991,12}, {   9983,14}, \
+    {   2559,13}, {   5119,14}, {   2815,13}, {   5887,15}, \
+    {   1535,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 199
+#define SQR_FFT_THRESHOLD                 4736
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  27
+#define MULLO_MUL_N_THRESHOLD            13463
+#define SQRLO_BASECASE_THRESHOLD             0  /* always */
+#define SQRLO_DC_THRESHOLD                  26
+#define SQRLO_SQR_THRESHOLD               8907
+
+#define DC_DIV_QR_THRESHOLD                 38
+#define DC_DIVAPPR_Q_THRESHOLD             103
+#define DC_BDIV_QR_THRESHOLD                44
+#define DC_BDIV_Q_THRESHOLD                 98
+
+#define INV_MULMOD_BNM1_THRESHOLD           78
+#define INV_NEWTON_THRESHOLD               165
+#define INV_APPR_THRESHOLD                 115
+
+#define BINV_NEWTON_THRESHOLD              296
+#define REDC_1_TO_REDC_2_THRESHOLD           2
+#define REDC_2_TO_REDC_N_THRESHOLD         147
+
+#define MU_DIV_QR_THRESHOLD               2089
+#define MU_DIVAPPR_Q_THRESHOLD            2089
+#define MUPI_DIV_QR_THRESHOLD               70
+#define MU_BDIV_QR_THRESHOLD              1718
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define POWM_SEC_TABLE  7,19,107,480,1486
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define SET_STR_DC_THRESHOLD               126
+#define SET_STR_PRECOMPUTE_THRESHOLD       541
+
+#define FAC_DSC_THRESHOLD                  132
+#define FAC_ODD_THRESHOLD                   29
+
+#define MATRIX22_STRASSEN_THRESHOLD         30
+#define HGCD2_DIV1_METHOD                    1  /* 6.55% faster than 3 */
+#define HGCD_THRESHOLD                      54
+#define HGCD_APPR_THRESHOLD                 52
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   303
+#define GCDEXT_DC_THRESHOLD                225
+#define JACOBI_BASE_METHOD                   4  /* 9.73% faster than 1 */
+
+/* Tuneup completed successfully, took 111418 seconds */

diff --git a/third_party/gmp/mpn/arm/v7a/cora17/mod_34lsub1.asm b/third_party/gmp/mpn/arm/v7a/cora17/mod_34lsub1.asm
new file mode 100644
index 0000000..39e5a15
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora17/mod_34lsub1.asm

@@ -0,0 +1,121 @@
+dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2012, 2013, 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A5	 2.67
+C Cortex-A7	 2.37
+C Cortex-A8	 2.34
+C Cortex-A9	 ?
+C Cortex-A15	 1.39
+C Cortex-A17	 1.60
+C Cortex-A53	 2.51
+
+define(`ap',	r0)
+define(`n',	r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Write cleverer summation code.
+C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	push	{ r4, r5, r6, r7 }
+
+	subs	n, n, #3
+	mov	r7, #0
+	blt	L(le2)			C n <= 2
+
+	ldmia	ap!, { r2, r3, r12 }
+	subs	n, n, #3
+	blt	L(sum)			C n <= 5
+	mov	r7, #0
+	b	L(mid)
+
+L(top):	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+	adc	r7, r7, #0
+L(mid):	ldmia	ap!, { r4, r5, r6 }
+	subs	n, n, #3
+	bpl	L(top)
+
+	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+	adc	r7, r7, #0		C r7 <= 1
+
+L(sum):	cmn	n, #2
+	movlo	r4, #0
+	ldrhs	r4, [ap], #4
+	movls	r5, #0
+	ldrhi	r5, [ap], #4
+
+	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, #0
+	adc	r7, r7, #0		C r7 <= 2
+
+L(sum2):
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	add	r0, r0, r7
+
+	mov	r7, r3, lsl #8
+	bic	r2, r7, #0xff000000
+	add	r0, r0, r2
+	add	r0, r0, r3, lsr #16
+
+	mov	r2, r12, lsl #16
+	bic	r1, r2, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r12, lsr #8
+
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+
+L(le2):	cmn	n, #1
+	bne	L(1)
+	ldmia	ap!, { r2, r3 }
+	mov	r12, #0
+	b	L(sum2)
+L(1):	ldr	r2, [ap]
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora17/mul_1.asm b/third_party/gmp/mpn/arm/v7a/cora17/mul_1.asm
new file mode 100644
index 0000000..d9b6042
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora17/mul_1.asm

@@ -0,0 +1,34 @@
+dnl  ARM mpn_mul_1
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mul_1)
+include_mpn(`arm/v6/mul_1.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora17/submul_1.asm b/third_party/gmp/mpn/arm/v7a/cora17/submul_1.asm
new file mode 100644
index 0000000..f3e8139
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora17/submul_1.asm

@@ -0,0 +1,34 @@
+dnl  ARM mpn_submul_1
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_submul_1)
+include_mpn(`arm/v6/submul_1.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora5/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora5/gmp-mparam.h
new file mode 100644
index 0000000..e3564e0
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora5/gmp-mparam.h

@@ -0,0 +1,205 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1500 MHz Cortex-A5 (odroid c1) */
+/* FFT tuning limit = 18,235,562 */
+/* Generated by tuneup.c, 2019-10-22, gcc 4.9 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     23
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 132.79% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           52
+
+#define DIV_1_VS_MUL_1_PERCENT             213
+
+#define MUL_TOOM22_THRESHOLD                48
+#define MUL_TOOM33_THRESHOLD               143
+#define MUL_TOOM44_THRESHOLD               262
+#define MUL_TOOM6H_THRESHOLD               414
+#define MUL_TOOM8H_THRESHOLD               527
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     153
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     168
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     152
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     180
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     226
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 66
+#define SQR_TOOM3_THRESHOLD                149
+#define SQR_TOOM4_THRESHOLD                348
+#define SQR_TOOM6_THRESHOLD                517
+#define SQR_TOOM8_THRESHOLD                608
+
+#define MULMID_TOOM42_THRESHOLD             70
+
+#define MULMOD_BNM1_THRESHOLD               26
+#define SQRMOD_BNM1_THRESHOLD               28
+
+#define MUL_FFT_MODF_THRESHOLD             660  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    660, 5}, {     29, 6}, {     15, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     29, 7}, {     15, 6}, \
+    {     37, 7}, {     19, 6}, {     40, 7}, {     21, 6}, \
+    {     43, 7}, {     37, 8}, {     19, 7}, {     43, 8}, \
+    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     83, 9}, {     47, 8}, {     99, 9}, {     55,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    167,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,10}, {    415,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    511, 9}, {   1023,10}, \
+    {    543,11}, {    287,10}, {    607,11}, {    319,10}, \
+    {    671,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,11}, {    575,10}, {   1151,11}, {    607,12}, \
+    {    319,11}, {    703,12}, {    383,11}, {    831,12}, \
+    {    447,11}, {    895,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1183,12}, {    639,11}, \
+    {   1279,12}, {    703,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    895,14}, {    255,13}, {    511,12}, \
+    {   1151,13}, {    639,12}, {   1407,13}, {    767,12}, \
+    {   1599,13}, {    895,12}, {   1791,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2367,13}, \
+    {   1279,12}, {   2559,13}, {   1407,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1663,12}, {   3327,13}, \
+    {   1791,15}, {    511,14}, {   1023,13}, {   2175,12}, \
+    {   4351,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 140
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             590  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    590, 5}, {     33, 6}, {     17, 5}, {     35, 6}, \
+    {     36, 7}, {     19, 6}, {     40, 7}, {     21, 6}, \
+    {     43, 7}, {     23, 6}, {     47, 7}, {     37, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    167,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    335, 9}, {    671,10}, {    351,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    415,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    575, 9}, {   1151,10}, \
+    {    607,11}, {    319,10}, {    671,11}, {    351,12}, \
+    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
+    {    831,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,10}, {   1087,11}, {    575,10}, \
+    {   1151,11}, {    607,12}, {    319,11}, {    735,12}, \
+    {    383,11}, {    831,12}, {    447,11}, {    927,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    639,11}, {   1279,12}, {    703,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
+    {   1663,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1023,11}, {   2047,12}, \
+    {   1151,13}, {    639,12}, {   1407,13}, {    767,12}, \
+    {   1599,13}, {    895,12}, {   1791,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2367,13}, \
+    {   1279,12}, {   2559,13}, {   1407,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1663,12}, {   3327,13}, \
+    {   1791,15}, {    511,14}, {   1023,13}, {   2175,12}, \
+    {   4351,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 144
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  39
+#define MULLO_MUL_N_THRESHOLD            14709
+#define SQRLO_BASECASE_THRESHOLD             8
+#define SQRLO_DC_THRESHOLD                  33
+#define SQRLO_SQR_THRESHOLD              11278
+
+#define DC_DIV_QR_THRESHOLD                 36
+#define DC_DIVAPPR_Q_THRESHOLD             116
+#define DC_BDIV_QR_THRESHOLD                48
+#define DC_BDIV_Q_THRESHOLD                140
+
+#define INV_MULMOD_BNM1_THRESHOLD           95
+#define INV_NEWTON_THRESHOLD               181
+#define INV_APPR_THRESHOLD                 125
+
+#define BINV_NEWTON_THRESHOLD              327
+#define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD         152
+
+#define MU_DIV_QR_THRESHOLD               2350
+#define MU_DIVAPPR_Q_THRESHOLD            2130
+#define MUPI_DIV_QR_THRESHOLD               98
+#define MU_BDIV_QR_THRESHOLD              1970
+#define MU_BDIV_Q_THRESHOLD               2172
+
+#define POWM_SEC_TABLE  6,37,108,624,2351
+
+#define GET_STR_DC_THRESHOLD                28
+#define GET_STR_PRECOMPUTE_THRESHOLD        44
+#define SET_STR_DC_THRESHOLD               309
+#define SET_STR_PRECOMPUTE_THRESHOLD       762
+
+#define FAC_DSC_THRESHOLD                  236
+#define FAC_ODD_THRESHOLD                   29
+
+#define MATRIX22_STRASSEN_THRESHOLD         25
+#define HGCD2_DIV1_METHOD                    5  /* 2.92% faster than 3 */
+#define HGCD_THRESHOLD                      70
+#define HGCD_APPR_THRESHOLD                 59
+#define HGCD_REDUCE_THRESHOLD             4120
+#define GCD_DC_THRESHOLD                   229
+#define GCDEXT_DC_THRESHOLD                233
+#define JACOBI_BASE_METHOD                   1  /* 17.07% faster than 4 */
+
+/* Tuneup completed successfully, took 47845 seconds */

diff --git a/third_party/gmp/mpn/arm/v7a/cora7/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora7/gmp-mparam.h
new file mode 100644
index 0000000..78de045
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora7/gmp-mparam.h

@@ -0,0 +1,202 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 900 MHz Cortex-A7 (raspberry pi2) */
+/* FFT tuning limit = 21,559,921 */
+/* Generated by tuneup.c, 2019-10-22, gcc 8.3 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 64.16% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           48
+
+#define DIV_1_VS_MUL_1_PERCENT             216
+
+#define MUL_TOOM22_THRESHOLD                39
+#define MUL_TOOM33_THRESHOLD               129
+#define MUL_TOOM44_THRESHOLD               196
+#define MUL_TOOM6H_THRESHOLD               327
+#define MUL_TOOM8H_THRESHOLD               478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     144
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     190
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 52
+#define SQR_TOOM3_THRESHOLD                162
+#define SQR_TOOM4_THRESHOLD                268
+#define SQR_TOOM6_THRESHOLD                399
+#define SQR_TOOM8_THRESHOLD                547
+
+#define MULMID_TOOM42_THRESHOLD             50
+
+#define MULMOD_BNM1_THRESHOLD               21
+#define SQRMOD_BNM1_THRESHOLD               25
+
+#define MUL_FFT_MODF_THRESHOLD             636  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    636, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     43, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
+    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
+    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
+    {    799,10}, {    415,11}, {    223,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543,11}, \
+    {    287,10}, {    607,11}, {    319,10}, {    671,11}, \
+    {    351,12}, {    191,11}, {    383,10}, {    799,11}, \
+    {    415,10}, {    831,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,11}, {    543,10}, {   1087,11}, \
+    {    607,12}, {    319,11}, {    735,12}, {    383,11}, \
+    {    863,12}, {    447,11}, {    959,13}, {    255,12}, \
+    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
+    {    639,11}, {   1279,12}, {    703,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1215,13}, {    639,12}, {   1471,13}, \
+    {    767,12}, {   1663,13}, {    895,12}, {   1855,14}, \
+    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
+    {   2431,13}, {   1407,14}, {    767,13}, {   1663,12}, \
+    {   3327,13}, {   1791,15}, {    511,14}, {   1023,13}, \
+    {   2431,14}, {   1279,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 133
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             535  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    535, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
+    {     15, 5}, {     31, 6}, {     29, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,10}, {    287,11}, {    159,10}, \
+    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
+    {    351,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,10}, {    415, 9}, {    831,11}, \
+    {    223,12}, {    127,10}, {    543,11}, {    287,10}, \
+    {    607,11}, {    319,10}, {    671,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
+    {    415,10}, {    831,13}, {    127,11}, {    511,10}, \
+    {   1023,11}, {    543,10}, {   1087,11}, {    607,12}, \
+    {    319,11}, {    735,12}, {    383,11}, {    863,12}, \
+    {    447,11}, {    991,12}, {    511,11}, {   1087,12}, \
+    {    575,11}, {   1215,12}, {    639,11}, {   1279,12}, \
+    {    703,13}, {    383,12}, {    767,11}, {   1535,12}, \
+    {    831,11}, {   1663,12}, {    959,13}, {    511,12}, \
+    {   1215,13}, {    639,12}, {   1471,13}, {    767,12}, \
+    {   1663,13}, {    895,12}, {   1855,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
+    {   1407,14}, {    767,13}, {   1791,15}, {    511,14}, \
+    {   1023,13}, {   2431,14}, {   1279,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 134
+#define SQR_FFT_THRESHOLD                 4736
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  27
+#define MULLO_MUL_N_THRESHOLD            13463
+#define SQRLO_BASECASE_THRESHOLD             5
+#define SQRLO_DC_THRESHOLD                  31
+#define SQRLO_SQR_THRESHOLD               9449
+
+#define DC_DIV_QR_THRESHOLD                 28
+#define DC_DIVAPPR_Q_THRESHOLD              90
+#define DC_BDIV_QR_THRESHOLD                32
+#define DC_BDIV_Q_THRESHOLD                110
+
+#define INV_MULMOD_BNM1_THRESHOLD           78
+#define INV_NEWTON_THRESHOLD               134
+#define INV_APPR_THRESHOLD                  98
+
+#define BINV_NEWTON_THRESHOLD              278
+#define REDC_1_TO_REDC_2_THRESHOLD           4
+#define REDC_2_TO_REDC_N_THRESHOLD         123
+
+#define MU_DIV_QR_THRESHOLD               1718
+#define MU_DIVAPPR_Q_THRESHOLD            1685
+#define MUPI_DIV_QR_THRESHOLD               62
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               1718
+
+#define POWM_SEC_TABLE  1,22,95,563,1955
+
+#define GET_STR_DC_THRESHOLD                28
+#define GET_STR_PRECOMPUTE_THRESHOLD        51
+#define SET_STR_DC_THRESHOLD               182
+#define SET_STR_PRECOMPUTE_THRESHOLD       638
+
+#define FAC_DSC_THRESHOLD                  153
+#define FAC_ODD_THRESHOLD                   56
+
+#define MATRIX22_STRASSEN_THRESHOLD         25
+#define HGCD2_DIV1_METHOD                    1  /* 5.04% faster than 3 */
+#define HGCD_THRESHOLD                      55
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   153
+#define GCDEXT_DC_THRESHOLD                180
+#define JACOBI_BASE_METHOD                   1  /* 30.60% faster than 4 */
+
+/* Tuneup completed successfully, took 75202 seconds */

diff --git a/third_party/gmp/mpn/arm/v7a/cora8/bdiv_q_1.asm b/third_party/gmp/mpn/arm/v7a/cora8/bdiv_q_1.asm
new file mode 100644
index 0000000..e74b260
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora8/bdiv_q_1.asm

@@ -0,0 +1,158 @@
+dnl  ARM v6 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+dnl  This is v6 code but it runs well on just the v7a Cortex-A8, A9, and A15.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C 1176		 -	 -
+C Cortex-A5	 9	13
+C Cortex-A7	12	18
+C Cortex-A8	13	14
+C Cortex-A9	 9	10		not measured since latest edits
+C Cortex-A15	 7	 7
+C Cortex-A53	16	24
+
+C Architecture requirements:
+C v5	-
+C v5t	clz
+C v5te	-
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`d',   `r3')
+define(`di_arg',  `sp[0]')		C	just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r4')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+	push	{r6-r11}
+
+	rsb	r10, d, #0
+	and	r10, r10, d
+	clz	r10, r10
+	rsbs	cnt, r10, #31		C count_trailing_zeros
+	mov	d, d, lsr cnt
+
+C binvert limb
+	LEA(	r10, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r10, [r10, r12, lsr #1]
+	mul	r12, r10, r10
+	mul	r12, d, r12
+	rsb	r12, r12, r10, lsl #1
+	mul	r10, r12, r12
+	mul	r10, d, r10
+	rsb	r10, r10, r12, lsl #1	C r10 = inverse
+	b	L(pi1)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	push	{r6-r11}
+
+	ldr	cnt, [sp, #28]
+	ldr	r10, [sp, #24]
+	cmp	cnt, #0
+
+L(pi1):	ldr	r11, [up], #4		C up[0]
+	mov	cy, #0
+	rsb	r8, r10, #0		C r8 = -inverse
+	bne	L(unorm)
+
+L(norm):
+	subs	n, n, #1
+	mul	r11, r11, r10
+	beq	L(edn)
+
+	ALIGN(16)
+L(tpn):	ldr	r9, [up], #4
+	mov	r12, #0
+	str	r11, [rp], #4
+	umaal	r12, cy, r11, d
+	mul	r11, r9, r10
+	mla	r11, cy, r8, r11
+	subs	n, n, #1
+	bne	L(tpn)
+
+L(edn):	str	r11, [rp]
+	pop	{r6-r11}
+	bx	r14
+
+L(unorm):
+	push	{r4-r5}
+	rsb	tnc, cnt, #32
+	mov	r5, r11, lsr cnt
+	subs	n, n, #1
+	beq	L(ed1)
+
+	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	mul	r11, r9, r10
+	subs	n, n, #1
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	mov	r12, #0
+	str	r11, [rp], #4
+	umaal	r12, cy, r11, d
+	mul	r11, r9, r10
+	mla	r11, cy, r8, r11
+	subs	n, n, #1
+	bne	L(tpu)
+
+L(edu):	str	r11, [rp], #4
+	mov	r12, #0
+	umaal	r12, cy, r11, d
+	mul	r11, r5, r10
+	mla	r11, cy, r8, r11
+	str	r11, [rp]
+	pop	{r4-r11}
+	bx	r14
+
+L(ed1):	mul	r11, r5, r10
+	str	r11, [rp]
+	pop	{r4-r11}
+	bx	r14
+EPILOGUE()

diff --git a/third_party/gmp/mpn/arm/v7a/cora8/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora8/gmp-mparam.h
new file mode 100644
index 0000000..5864841
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora8/gmp-mparam.h

@@ -0,0 +1,207 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1000 MHz Cortex-A8 (beaglebone black) */
+/* FFT tuning limit = 9,464,348 */
+/* Generated by tuneup.c, 2019-10-23, gcc 6.3 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1  /* 50.65% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           31
+
+#define DIV_1_VS_MUL_1_PERCENT             192
+
+#define MUL_TOOM22_THRESHOLD                39
+#define MUL_TOOM33_THRESHOLD               129
+#define MUL_TOOM44_THRESHOLD               226
+#define MUL_TOOM6H_THRESHOLD               366
+#define MUL_TOOM8H_THRESHOLD               620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     141
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     154
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     160
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     193
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 46
+#define SQR_TOOM3_THRESHOLD                145
+#define SQR_TOOM4_THRESHOLD                375
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                547
+
+#define MULMID_TOOM42_THRESHOLD             38
+
+#define MULMOD_BNM1_THRESHOLD               22
+#define SQRMOD_BNM1_THRESHOLD               23
+
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    476, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
+    {     28, 7}, {     15, 6}, {     33, 7}, {     19, 6}, \
+    {     39, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     99, 9}, {     55,10}, {     31, 9}, {     87,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    167,10}, {     95, 9}, \
+    {    199,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383, 8}, {    767, 9}, {    399,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,10}, {    351, 9}, {    703,10}, {    367,11}, \
+    {    191,10}, {    399, 9}, {    799,10}, {    415,11}, \
+    {    223,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,10}, {    607, 9}, {   1215,11}, {    319,10}, \
+    {    671,11}, {    351,10}, {    703,12}, {    191,11}, \
+    {    383,10}, {    799,11}, {    415,10}, {    863,11}, \
+    {    447,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,11}, {    607,12}, {    319,11}, {    671,10}, \
+    {   1343,11}, {    735,12}, {    383,11}, {    799,10}, \
+    {   1599,11}, {    863,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1343,12}, {    703,13}, \
+    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
+    {   1663,12}, {    959,14}, {    255,13}, {    511,12}, \
+    {   1215,13}, {    639,12}, {   1407,13}, {    767,12}, \
+    {   1663,13}, {    895,12}, {   1791,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 139
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             436  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    436, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
+    {     43, 9}, {     23, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159, 8}, \
+    {    319, 9}, {    167,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
+    {    575, 9}, {    303,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383, 8}, {    767, 9}, \
+    {    399,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351, 9}, \
+    {    703,10}, {    367,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543,11}, \
+    {    287,10}, {    607,11}, {    319,10}, {    671,11}, \
+    {    351,10}, {    735,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    863,11}, {    447,10}, \
+    {    895,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,10}, {   1087,11}, {    607,12}, \
+    {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
+    {    383,11}, {    863,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    767,11}, {   1599,12}, \
+    {    831,11}, {   1663,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1215,13}, {    639,12}, {   1471,13}, \
+    {    767,12}, {   1663,13}, {    895,12}, {   1855,14}, \
+    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD            21
+#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD            13463
+#define SQRLO_BASECASE_THRESHOLD             9
+#define SQRLO_DC_THRESHOLD                  17
+#define SQRLO_SQR_THRESHOLD               7246
+
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD              74
+#define DC_BDIV_QR_THRESHOLD                21
+#define DC_BDIV_Q_THRESHOLD                 64
+
+#define INV_MULMOD_BNM1_THRESHOLD           78
+#define INV_NEWTON_THRESHOLD                31
+#define INV_APPR_THRESHOLD                  37
+
+#define BINV_NEWTON_THRESHOLD              167
+#define REDC_1_TO_REDC_2_THRESHOLD           4
+#define REDC_2_TO_REDC_N_THRESHOLD         198
+
+#define MU_DIV_QR_THRESHOLD               1858
+#define MU_DIVAPPR_Q_THRESHOLD            1685
+#define MUPI_DIV_QR_THRESHOLD               43
+#define MU_BDIV_QR_THRESHOLD              1589
+#define MU_BDIV_Q_THRESHOLD               1685
+
+#define POWM_SEC_TABLE  1,13,96,487,1378
+
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        36
+#define SET_STR_DC_THRESHOLD               145
+#define SET_STR_PRECOMPUTE_THRESHOLD       505
+
+#define FAC_DSC_THRESHOLD                  137
+#define FAC_ODD_THRESHOLD                   29
+
+#define MATRIX22_STRASSEN_THRESHOLD         24
+#define HGCD2_DIV1_METHOD                    5  /* 4.29% faster than 4 */
+#define HGCD_THRESHOLD                      39
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   116
+#define GCDEXT_DC_THRESHOLD                124
+#define JACOBI_BASE_METHOD                   4  /* 5.89% faster than 1 */
+
+/* Tuneup completed successfully, took 48230 seconds */

diff --git a/third_party/gmp/mpn/arm/v7a/cora9/bdiv_q_1.asm b/third_party/gmp/mpn/arm/v7a/cora9/bdiv_q_1.asm
new file mode 100644
index 0000000..245b371
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora9/bdiv_q_1.asm

@@ -0,0 +1,36 @@
+dnl  ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')

diff --git a/third_party/gmp/mpn/arm/v7a/cora9/gmp-mparam.h b/third_party/gmp/mpn/arm/v7a/cora9/gmp-mparam.h
new file mode 100644
index 0000000..5c54012
--- /dev/null
+++ b/third_party/gmp/mpn/arm/v7a/cora9/gmp-mparam.h

@@ -0,0 +1,211 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1000 MHz Cortex-A9 */
+/* FFT tuning limit = 25 M */
+/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1
+#define DIV_QR_1_NORM_THRESHOLD              5
+#define DIV_QR_1_UNNORM_THRESHOLD            1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
+
+#define DIV_1_VS_MUL_1_PERCENT             190
+
+#define MUL_TOOM22_THRESHOLD                45
+#define MUL_TOOM33_THRESHOLD               129
+#define MUL_TOOM44_THRESHOLD               387
+#define MUL_TOOM6H_THRESHOLD               537
+#define MUL_TOOM8H_THRESHOLD               774
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     141
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     237
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     141
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     258
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     211
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 64
+#define SQR_TOOM3_THRESHOLD                189
+#define SQR_TOOM4_THRESHOLD                517
+#define SQR_TOOM6_THRESHOLD                656
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
+
+#define MULMID_TOOM42_THRESHOLD             62
+
+#define MULMOD_BNM1_THRESHOLD               23
+#define SQRMOD_BNM1_THRESHOLD               28
+
+#define MUL_FFT_MODF_THRESHOLD             630  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    630, 5}, {     29, 6}, {     15, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     36, 7}, {     19, 6}, \
+    {     40, 7}, {     21, 6}, {     43, 7}, {     23, 6}, \
+    {     47, 7}, {     25, 6}, {     51, 7}, {     27, 6}, \
+    {     55, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     57, 9}, {     15, 8}, {     31, 7}, \
+    {     65, 8}, {     35, 7}, {     71, 8}, {     43, 9}, \
+    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
+    {     39, 8}, {     83, 9}, {     47, 8}, {     99, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {    103,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    167,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    159,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,10}, {    415,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    511, 9}, {   1023,10}, \
+    {    543,11}, {    287,10}, {    607,11}, {    319,10}, \
+    {    671,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,11}, {    607,12}, {    319,11}, {    735,12}, \
+    {    383,11}, {    831,12}, {    447,11}, {    927,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1343,12}, {    703,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
+    {   1663,12}, {    895,14}, {    255,13}, {    511,12}, \
+    {   1023,11}, {   2047,12}, {   1151,13}, {    639,12}, \
+    {   1407,13}, {    767,12}, {   1663,13}, {    895,12}, \
+    {   1791,14}, {    511,13}, {   1023,12}, {   2111,13}, \
+    {   1151,12}, {   2431,13}, {   1279,12}, {   2559,13}, \
+    {   1407,14}, {    767,13}, {   1535,12}, {   3071,13}, \
+    {   1663,12}, {   3455,13}, {   1791,15}, {    511,14}, \
+    {   1023,13}, {   2047,12}, {   4095,13}, {   2175,12}, \
+    {   4479,13}, {   2431,14}, {   1279,13}, {   2559,12}, \
+    {   5119,13}, {   2815,12}, {   5631,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 157
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    565, 5}, {     19, 4}, {     40, 5}, {     21, 4}, \
+    {     43, 5}, {     28, 6}, {     15, 5}, {     35, 6}, \
+    {     29, 7}, {     15, 6}, {     37, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 6}, {     43, 7}, {     23, 6}, \
+    {     47, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     55, 9}, {     15, 8}, {     31, 7}, \
+    {     65, 8}, {     35, 7}, {     71, 8}, {     43, 9}, \
+    {     23, 8}, {     55,10}, {     15, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511, 8}, {   1023, 9}, \
+    {    527,10}, {    271, 9}, {    543,10}, {    287,11}, \
+    {    159, 9}, {    639,10}, {    335, 9}, {    671,10}, \
+    {    351,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,10}, {    415,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    511, 9}, {   1023,10}, \
+    {    543,11}, {    287,10}, {    671,11}, {    351,12}, \
+    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
+    {    831,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,10}, {   1087,11}, {    735,12}, \
+    {    383,11}, {    831,12}, {    447,11}, {    927,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    639,11}, {   1343,12}, {    703,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
+    {   1663,12}, {    959,13}, {    511,12}, {   1023,11}, \
+    {   2047,12}, {   1151,13}, {    639,12}, {   1407,13}, \
+    {    767,12}, {   1599,13}, {    895,12}, {   1791,14}, \
+    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
+    {   2431,13}, {   1279,12}, {   2559,13}, {   1407,14}, \
+    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
+    {   3455,13}, {   1791,15}, {    511,14}, {   1023,13}, \
+    {   2047,12}, {   4095,13}, {   2175,12}, {   4479,13}, \
+    {   2303,14}, {   1279,13}, {   2559,12}, {   5119,13}, \
+    {   2815,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 155
+#define SQR_FFT_THRESHOLD                 5568
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  37
+#define MULLO_MUL_N_THRESHOLD            13463
+#define SQRLO_BASECASE_THRESHOLD            12
+#define SQRLO_DC_THRESHOLD                  22
+#define SQRLO_SQR_THRESHOLD              10950
+
+#define DC_DIV_QR_THRESHOLD                 32
+#define DC_DIVAPPR_Q_THRESHOLD              99
+#define DC_BDIV_QR_THRESHOLD                43
+#define DC_BDIV_Q_THRESHOLD                102
+
+#define INV_MULMOD_BNM1_THRESHOLD           88
+#define INV_NEWTON_THRESHOLD               141
+#define INV_APPR_THRESHOLD                 111
+
+#define BINV_NEWTON_THRESHOLD              312
+#define REDC_1_TO_REDC_2_THRESHOLD           6
+#define REDC_2_TO_REDC_N_THRESHOLD         140
+
+#define MU_DIV_QR_THRESHOLD               2492
+#define MU_DIVAPPR_Q_THRESHOLD            2130
+#define MUPI_DIV_QR_THRESHOLD               55
+#define MU_BDIV_QR_THRESHOLD              2130
+#define MU_BDIV_Q_THRESHOLD               2172
+
+#define POWM_SEC_TABLE  40,53,56,71,1985
+
+#define GET_STR_DC_THRESHOLD                16
+#define GET_STR_PRECOMPUTE_THRESHOLD        33
+#define SET_STR_DC_THRESHOLD               172
+#define SET_STR_PRECOMPUTE_THRESHOLD       671
+
+#define FAC_DSC_THRESHOLD                  309
+#define FAC_ODD_THRESHOLD                   29
+
+#define MATRIX22_STRASSEN_THRESHOLD         24
+#define HGCD_THRESHOLD                      61
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             4120
+#define GCD_DC_THRESHOLD                   408
+#define GCDEXT_DC_THRESHOLD                303
+#define JACOBI_BASE_METHOD                   4
commit	dace2a60b36477ef4295dea0cd01ae43a65f986f	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Tue Aug 18 10:56:48 2020 -0700
committer	Austin Schuh <austin.linux@gmail.com>	Wed Aug 19 16:14:33 2020 -0700
tree	f6c709631ecad56062e08308ae06dd1425613c36
parent	1b6c20f6a7928823e49afda98bf720d99c5689ba [diff]