blob: 6e1e21455894351ae2722ec9b59da68dc7f08f4b [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001dnl Alpha mpn_rshift -- Shift a number right.
2
3dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5dnl This file is part of the GNU MP Library.
6dnl
7dnl The GNU MP Library is free software; you can redistribute it and/or modify
8dnl it under the terms of either:
9dnl
10dnl * the GNU Lesser General Public License as published by the Free
11dnl Software Foundation; either version 3 of the License, or (at your
12dnl option) any later version.
13dnl
14dnl or
15dnl
16dnl * the GNU General Public License as published by the Free Software
17dnl Foundation; either version 2 of the License, or (at your option) any
18dnl later version.
19dnl
20dnl or both in parallel, as here.
21dnl
22dnl The GNU MP Library is distributed in the hope that it will be useful, but
23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25dnl for more details.
26dnl
27dnl You should have received copies of the GNU General Public License and the
28dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29dnl see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C cycles/limb
34C EV4: ?
35C EV5: 3.25
36C EV6: 1.75
37
38C INPUT PARAMETERS
39C rp r16
40C up r17
41C n r18
42C cnt r19
43
44
45ASM_START()
46PROLOGUE(mpn_rshift)
47 ldq r4,0(r17) C load first limb
48 subq r31,r19,r20
49 subq r18,1,r18
50 and r18,4-1,r28 C number of limbs in first loop
51 sll r4,r20,r0 C compute function result
52
53 beq r28,L(L0)
54 subq r18,r28,r18
55
56 ALIGN(8)
57L(top0):
58 ldq r3,8(r17)
59 addq r16,8,r16
60 srl r4,r19,r5
61 addq r17,8,r17
62 subq r28,1,r28
63 sll r3,r20,r6
64 bis r3,r3,r4
65 bis r5,r6,r8
66 stq r8,-8(r16)
67 bne r28,L(top0)
68
69L(L0): srl r4,r19,r24
70 beq r18,L(end)
71C warm up phase 1
72 ldq r1,8(r17)
73 subq r18,4,r18
74 ldq r2,16(r17)
75 ldq r3,24(r17)
76 ldq r4,32(r17)
77C warm up phase 2
78 sll r1,r20,r7
79 srl r1,r19,r21
80 sll r2,r20,r8
81 beq r18,L(end1)
82 ldq r1,40(r17)
83 srl r2,r19,r22
84 ldq r2,48(r17)
85 sll r3,r20,r5
86 bis r7,r24,r7
87 srl r3,r19,r23
88 bis r8,r21,r8
89 sll r4,r20,r6
90 ldq r3,56(r17)
91 srl r4,r19,r24
92 ldq r4,64(r17)
93 subq r18,4,r18
94 beq r18,L(end2)
95 ALIGN(16)
96C main loop
97L(top): stq r7,0(r16)
98 bis r5,r22,r5
99 stq r8,8(r16)
100 bis r6,r23,r6
101
102 sll r1,r20,r7
103 subq r18,4,r18
104 srl r1,r19,r21
105 unop C ldq r31,-96(r17)
106
107 sll r2,r20,r8
108 ldq r1,72(r17)
109 srl r2,r19,r22
110 ldq r2,80(r17)
111
112 stq r5,16(r16)
113 bis r7,r24,r7
114 stq r6,24(r16)
115 bis r8,r21,r8
116
117 sll r3,r20,r5
118 unop C ldq r31,-96(r17)
119 srl r3,r19,r23
120 addq r16,32,r16
121
122 sll r4,r20,r6
123 ldq r3,88(r17)
124 srl r4,r19,r24
125 ldq r4,96(r17)
126
127 addq r17,32,r17
128 bne r18,L(top)
129C cool down phase 2/1
130L(end2):
131 stq r7,0(r16)
132 bis r5,r22,r5
133 stq r8,8(r16)
134 bis r6,r23,r6
135 sll r1,r20,r7
136 srl r1,r19,r21
137 sll r2,r20,r8
138 srl r2,r19,r22
139 stq r5,16(r16)
140 bis r7,r24,r7
141 stq r6,24(r16)
142 bis r8,r21,r8
143 sll r3,r20,r5
144 srl r3,r19,r23
145 sll r4,r20,r6
146 srl r4,r19,r24
147C cool down phase 2/2
148 stq r7,32(r16)
149 bis r5,r22,r5
150 stq r8,40(r16)
151 bis r6,r23,r6
152 stq r5,48(r16)
153 stq r6,56(r16)
154C cool down phase 2/3
155 stq r24,64(r16)
156 ret r31,(r26),1
157
158C cool down phase 1/1
159L(end1):
160 srl r2,r19,r22
161 sll r3,r20,r5
162 bis r7,r24,r7
163 srl r3,r19,r23
164 bis r8,r21,r8
165 sll r4,r20,r6
166 srl r4,r19,r24
167C cool down phase 1/2
168 stq r7,0(r16)
169 bis r5,r22,r5
170 stq r8,8(r16)
171 bis r6,r23,r6
172 stq r5,16(r16)
173 stq r6,24(r16)
174 stq r24,32(r16)
175 ret r31,(r26),1
176
177L(end): stq r24,0(r16)
178 ret r31,(r26),1
179EPILOGUE(mpn_rshift)
180ASM_END()