blob: d86cdcbd6301c1b8e25d72d464d8fa3e03b69d59 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001dnl PowerPC-32 mpn_rshift -- Shift a number right.
2
3dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
4
5dnl This file is part of the GNU MP Library.
6dnl
7dnl The GNU MP Library is free software; you can redistribute it and/or modify
8dnl it under the terms of either:
9dnl
10dnl * the GNU Lesser General Public License as published by the Free
11dnl Software Foundation; either version 3 of the License, or (at your
12dnl option) any later version.
13dnl
14dnl or
15dnl
16dnl * the GNU General Public License as published by the Free Software
17dnl Foundation; either version 2 of the License, or (at your option) any
18dnl later version.
19dnl
20dnl or both in parallel, as here.
21dnl
22dnl The GNU MP Library is distributed in the hope that it will be useful, but
23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25dnl for more details.
26dnl
27dnl You should have received copies of the GNU General Public License and the
28dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29dnl see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C cycles/limb
34C 603e: ?
35C 604e: 3.0
36C 75x (G3): 3.0
37C 7400,7410 (G4): 3.0
38C 7445,7455 (G4+): 2.5
39C 7447,7457 (G4+): 2.25
40C power4/ppc970: 2.5
41C power5: 2.5
42
43C INPUT PARAMETERS
44C rp r3
45C up r4
46C n r5
47C cnt r6
48
49ASM_START()
50PROLOGUE(mpn_rshift)
51 cmpwi cr0, r5, 30 C more than 30 limbs?
52 addi r7, r3, -4 C dst-4
53 bgt L(BIG) C branch if more than 12 limbs
54
55 mtctr r5 C copy size into CTR
56 subfic r8, r6, 32
57 lwz r11, 0(r4) C load first s1 limb
58 slw r3, r11, r8 C compute function return value
59 bdz L(end1)
60
61L(oop): lwzu r10, 4(r4)
62 srw r9, r11, r6
63 slw r12, r10, r8
64 or r9, r9, r12
65 stwu r9, 4(r7)
66 bdz L(end2)
67 lwzu r11, 4(r4)
68 srw r9, r10, r6
69 slw r12, r11, r8
70 or r9, r9, r12
71 stwu r9, 4(r7)
72 bdnz L(oop)
73
74L(end1):
75 srw r0, r11, r6
76 stw r0, 4(r7)
77 blr
78L(end2):
79 srw r0, r10, r6
80 stw r0, 4(r7)
81 blr
82
83L(BIG):
84 stwu r1, -48(r1)
85 stmw r24, 8(r1) C save registers we are supposed to preserve
86 lwz r9, 0(r4)
87 subfic r8, r6, 32
88 slw r3, r9, r8 C compute function return value
89 srw r0, r9, r6
90 addi r5, r5, -1
91
92 andi. r10, r5, 3 C count for spill loop
93 beq L(e)
94 mtctr r10
95 lwzu r28, 4(r4)
96 bdz L(xe0)
97
98L(loop0):
99 srw r12, r28, r6
100 slw r24, r28, r8
101 lwzu r28, 4(r4)
102 or r24, r0, r24
103 stwu r24, 4(r7)
104 mr r0, r12
105 bdnz L(loop0) C taken at most once!
106
107L(xe0): srw r12, r28, r6
108 slw r24, r28, r8
109 or r24, r0, r24
110 stwu r24, 4(r7)
111 mr r0, r12
112
113L(e): srwi r5, r5, 2 C count for unrolled loop
114 addi r5, r5, -1
115 mtctr r5
116 lwz r28, 4(r4)
117 lwz r29, 8(r4)
118 lwz r30, 12(r4)
119 lwzu r31, 16(r4)
120
121L(loopU):
122 srw r9, r28, r6
123 slw r24, r28, r8
124 lwz r28, 4(r4)
125 srw r10, r29, r6
126 slw r25, r29, r8
127 lwz r29, 8(r4)
128 srw r11, r30, r6
129 slw r26, r30, r8
130 lwz r30, 12(r4)
131 srw r12, r31, r6
132 slw r27, r31, r8
133 lwzu r31, 16(r4)
134 or r24, r0, r24
135 stw r24, 4(r7)
136 or r25, r9, r25
137 stw r25, 8(r7)
138 or r26, r10, r26
139 stw r26, 12(r7)
140 or r27, r11, r27
141 stwu r27, 16(r7)
142 mr r0, r12
143 bdnz L(loopU)
144
145 srw r9, r28, r6
146 slw r24, r28, r8
147 srw r10, r29, r6
148 slw r25, r29, r8
149 srw r11, r30, r6
150 slw r26, r30, r8
151 srw r12, r31, r6
152 slw r27, r31, r8
153 or r24, r0, r24
154 stw r24, 4(r7)
155 or r25, r9, r25
156 stw r25, 8(r7)
157 or r26, r10, r26
158 stw r26, 12(r7)
159 or r27, r11, r27
160 stw r27, 16(r7)
161
162 stw r12, 20(r7)
163 lmw r24, 8(r1) C restore registers
164 addi r1, r1, 48
165 blr
166EPILOGUE()