blob: bd5a71f3280b42752139db3660b36731825a929c [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
2
3dnl This file is part of the GNU MP Library.
4dnl
5dnl The GNU MP Library is free software; you can redistribute it and/or modify
6dnl it under the terms of either:
7dnl
8dnl * the GNU Lesser General Public License as published by the Free
9dnl Software Foundation; either version 3 of the License, or (at your
10dnl option) any later version.
11dnl
12dnl or
13dnl
14dnl * the GNU General Public License as published by the Free Software
15dnl Foundation; either version 2 of the License, or (at your option) any
16dnl later version.
17dnl
18dnl or both in parallel, as here.
19dnl
20dnl The GNU MP Library is distributed in the hope that it will be useful, but
21dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23dnl for more details.
24dnl
25dnl You should have received copies of the GNU General Public License and the
26dnl GNU Lesser General Public License along with the GNU MP Library. If not,
27dnl see https://www.gnu.org/licenses/.
28
29
30dnl Optimizations:
31dnl * Avoid skip instructions
32dnl * Put carry-generating and carry-consuming insns consecutively
33dnl * Don't allocate any stack, "home" positions for parameters could be used.
34
35include(`../config.m4')
36
37define(`p0',`%r28')
38define(`p1',`%r29')
39define(`t32',`%r19')
40define(`t0',`%r20')
41define(`t1',`%r21')
42define(`x',`%r22')
43define(`m0',`%r23')
44define(`m1',`%r24')
45
46ifdef(`HAVE_ABI_2_0w',
47` .level 2.0w
48',` .level 2.0
49')
50PROLOGUE(mpn_umul_ppmm_r)
51 ldo 128(%r30),%r30
52ifdef(`HAVE_ABI_2_0w',
53` std %r26,-64(%r30)
54 std %r25,-56(%r30)
55 copy %r24,%r31
56',`
57 depd %r25,31,32,%r26
58 std %r26,-64(%r30)
59 depd %r23,31,32,%r24
60 std %r24,-56(%r30)
61 ldw -180(%r30),%r31
62')
63
64 fldd -64(%r30),%fr4
65 fldd -56(%r30),%fr5
66
67 xmpyu %fr5R,%fr4R,%fr6
68 fstd %fr6,-128(%r30)
69 xmpyu %fr5R,%fr4L,%fr7
70 fstd %fr7,-120(%r30)
71 xmpyu %fr5L,%fr4R,%fr8
72 fstd %fr8,-112(%r30)
73 xmpyu %fr5L,%fr4L,%fr9
74 fstd %fr9,-104(%r30)
75
76 depdi,z 1,31,1,t32 C t32 = 2^32
77
78 ldd -128(%r30),p0 C lo = low 64 bit of product
79 ldd -120(%r30),m0 C m0 = mid0 64 bit of product
80 ldd -112(%r30),m1 C m1 = mid1 64 bit of product
81 ldd -104(%r30),p1 C hi = high 64 bit of product
82
83 add,l,*nuv m0,m1,x C x = m1+m0
84 add,l t32,p1,p1 C propagate carry to mid of p1
85 depd,z x,31,32,t0 C lo32(m1+m0)
86 add t0,p0,p0
87 extrd,u x,31,32,t1 C hi32(m1+m0)
88 add,dc t1,p1,p1
89
90 std p0,0(%r31) C store low half of product
91ifdef(`HAVE_ABI_2_0w',
92` copy p1,%r28 C return val in %r28
93',` extrd,u p1,31,32,%r28 C return val in %r28,%r29
94')
95 bve (%r2)
96 ldo -128(%r30),%r30
97EPILOGUE(mpn_umul_ppmm_r)