blob: c62a856aea3335bd8d67de9fc3f27dbc2a4cd994 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001dnl Alpha mpn_lshift -- Shift a number left.
2
3dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
4
5dnl This file is part of the GNU MP Library.
6dnl
7dnl The GNU MP Library is free software; you can redistribute it and/or modify
8dnl it under the terms of either:
9dnl
10dnl * the GNU Lesser General Public License as published by the Free
11dnl Software Foundation; either version 3 of the License, or (at your
12dnl option) any later version.
13dnl
14dnl or
15dnl
16dnl * the GNU General Public License as published by the Free Software
17dnl Foundation; either version 2 of the License, or (at your option) any
18dnl later version.
19dnl
20dnl or both in parallel, as here.
21dnl
22dnl The GNU MP Library is distributed in the hope that it will be useful, but
23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25dnl for more details.
26dnl
27dnl You should have received copies of the GNU General Public License and the
28dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29dnl see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C cycles/limb
34C EV4: ?
35C EV5: 3.25
36C EV6: 1.75
37
38C INPUT PARAMETERS
39C rp r16
40C up r17
41C n r18
42C cnt r19
43
44
45ASM_START()
46PROLOGUE(mpn_lshift)
47 s8addq r18,r17,r17 C make r17 point at end of s1
48 ldq r4,-8(r17) C load first limb
49 subq r31,r19,r20
50 s8addq r18,r16,r16 C make r16 point at end of RES
51 subq r18,1,r18
52 and r18,4-1,r28 C number of limbs in first loop
53 srl r4,r20,r0 C compute function result
54
55 beq r28,L(L0)
56 subq r18,r28,r18
57
58 ALIGN(8)
59L(top0):
60 ldq r3,-16(r17)
61 subq r16,8,r16
62 sll r4,r19,r5
63 subq r17,8,r17
64 subq r28,1,r28
65 srl r3,r20,r6
66 bis r3,r3,r4
67 bis r5,r6,r8
68 stq r8,0(r16)
69 bne r28,L(top0)
70
71L(L0): sll r4,r19,r24
72 beq r18,L(end)
73C warm up phase 1
74 ldq r1,-16(r17)
75 subq r18,4,r18
76 ldq r2,-24(r17)
77 ldq r3,-32(r17)
78 ldq r4,-40(r17)
79C warm up phase 2
80 srl r1,r20,r7
81 sll r1,r19,r21
82 srl r2,r20,r8
83 beq r18,L(end1)
84 ldq r1,-48(r17)
85 sll r2,r19,r22
86 ldq r2,-56(r17)
87 srl r3,r20,r5
88 bis r7,r24,r7
89 sll r3,r19,r23
90 bis r8,r21,r8
91 srl r4,r20,r6
92 ldq r3,-64(r17)
93 sll r4,r19,r24
94 ldq r4,-72(r17)
95 subq r18,4,r18
96 beq r18,L(end2)
97 ALIGN(16)
98C main loop
99L(top): stq r7,-8(r16)
100 bis r5,r22,r5
101 stq r8,-16(r16)
102 bis r6,r23,r6
103
104 srl r1,r20,r7
105 subq r18,4,r18
106 sll r1,r19,r21
107 unop C ldq r31,-96(r17)
108
109 srl r2,r20,r8
110 ldq r1,-80(r17)
111 sll r2,r19,r22
112 ldq r2,-88(r17)
113
114 stq r5,-24(r16)
115 bis r7,r24,r7
116 stq r6,-32(r16)
117 bis r8,r21,r8
118
119 srl r3,r20,r5
120 unop C ldq r31,-96(r17)
121 sll r3,r19,r23
122 subq r16,32,r16
123
124 srl r4,r20,r6
125 ldq r3,-96(r17)
126 sll r4,r19,r24
127 ldq r4,-104(r17)
128
129 subq r17,32,r17
130 bne r18,L(top)
131C cool down phase 2/1
132L(end2):
133 stq r7,-8(r16)
134 bis r5,r22,r5
135 stq r8,-16(r16)
136 bis r6,r23,r6
137 srl r1,r20,r7
138 sll r1,r19,r21
139 srl r2,r20,r8
140 sll r2,r19,r22
141 stq r5,-24(r16)
142 bis r7,r24,r7
143 stq r6,-32(r16)
144 bis r8,r21,r8
145 srl r3,r20,r5
146 sll r3,r19,r23
147 srl r4,r20,r6
148 sll r4,r19,r24
149C cool down phase 2/2
150 stq r7,-40(r16)
151 bis r5,r22,r5
152 stq r8,-48(r16)
153 bis r6,r23,r6
154 stq r5,-56(r16)
155 stq r6,-64(r16)
156C cool down phase 2/3
157 stq r24,-72(r16)
158 ret r31,(r26),1
159
160C cool down phase 1/1
161L(end1):
162 sll r2,r19,r22
163 srl r3,r20,r5
164 bis r7,r24,r7
165 sll r3,r19,r23
166 bis r8,r21,r8
167 srl r4,r20,r6
168 sll r4,r19,r24
169C cool down phase 1/2
170 stq r7,-8(r16)
171 bis r5,r22,r5
172 stq r8,-16(r16)
173 bis r6,r23,r6
174 stq r5,-24(r16)
175 stq r6,-32(r16)
176 stq r24,-40(r16)
177 ret r31,(r26),1
178
179L(end): stq r24,-8(r16)
180 ret r31,(r26),1
181EPILOGUE(mpn_lshift)
182ASM_END()