blob: cfc242ea9ca274248c375a3444bc8014478a88d3 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001dnl HP-PA 2.0 mpn_rshift -- Right shift.
2
3dnl Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
4
5dnl This file is part of the GNU MP Library.
6dnl
7dnl The GNU MP Library is free software; you can redistribute it and/or modify
8dnl it under the terms of either:
9dnl
10dnl * the GNU Lesser General Public License as published by the Free
11dnl Software Foundation; either version 3 of the License, or (at your
12dnl option) any later version.
13dnl
14dnl or
15dnl
16dnl * the GNU General Public License as published by the Free Software
17dnl Foundation; either version 2 of the License, or (at your option) any
18dnl later version.
19dnl
20dnl or both in parallel, as here.
21dnl
22dnl The GNU MP Library is distributed in the hope that it will be useful, but
23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25dnl for more details.
26dnl
27dnl You should have received copies of the GNU General Public License and the
28dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29dnl see https://www.gnu.org/licenses/.
30
31
32dnl This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
33
34include(`../config.m4')
35
36dnl INPUT PARAMETERS
37define(`rp',`%r26')
38define(`up',`%r25')
39define(`n',`%r24')
40define(`cnt',`%r23')
41
42ifdef(`HAVE_ABI_2_0w',
43` .level 2.0w
44',` .level 2.0
45')
46PROLOGUE(mpn_rshift)
47 mtsar cnt
48 ldd 0(up), %r21
49 addib,= -1, n, L(end)
50 shrpd %r21, %r0, %sar, %r29 C compute carry out limb
51 depw,z n, 31, 3, %r28 C r28 = (size & 7)
52 sub %r0, n, %r22
53 depw,z %r22, 28, 3, %r22 C r22 = 8 * (-size & 7)
54 sub up, %r22, up C offset up
55 blr %r28, %r0 C branch into jump table
56 sub rp, %r22, rp C offset rp
57 b L(0)
58 nop
59 b L(1)
60 copy %r21, %r20
61 b L(2)
62 nop
63 b L(3)
64 copy %r21, %r20
65 b L(4)
66 nop
67 b L(5)
68 copy %r21, %r20
69 b L(6)
70 nop
71 b L(7)
72 copy %r21, %r20
73
74LDEF(loop)
75LDEF(0) ldd 8(up), %r20
76 shrpd %r20, %r21, %sar, %r21
77 std %r21, 0(rp)
78LDEF(7) ldd 16(up), %r21
79 shrpd %r21, %r20, %sar, %r20
80 std %r20, 8(rp)
81LDEF(6) ldd 24(up), %r20
82 shrpd %r20, %r21, %sar, %r21
83 std %r21, 16(rp)
84LDEF(5) ldd 32(up), %r21
85 shrpd %r21, %r20, %sar, %r20
86 std %r20, 24(rp)
87LDEF(4) ldd 40(up), %r20
88 shrpd %r20, %r21, %sar, %r21
89 std %r21, 32(rp)
90LDEF(3) ldd 48(up), %r21
91 shrpd %r21, %r20, %sar, %r20
92 std %r20, 40(rp)
93LDEF(2) ldd 56(up), %r20
94 shrpd %r20, %r21, %sar, %r21
95 std %r21, 48(rp)
96LDEF(1) ldd 64(up), %r21
97 ldo 64(up), up
98 shrpd %r21, %r20, %sar, %r20
99 std %r20, 56(rp)
100 addib,> -8, n, L(loop)
101 ldo 64(rp), rp
102
103LDEF(end)
104 shrpd %r0, %r21, %sar, %r21
105 std %r21, 0(rp)
106 bve (%r2)
107ifdef(`HAVE_ABI_2_0w',
108` copy %r29,%r28
109',` extrd,u %r29, 31, 32, %r28
110')
111EPILOGUE(mpn_rshift)