Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | dnl PowerPC-32 mpn_rshift -- Shift a number right. |
| 2 | |
| 3 | dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc. |
| 4 | |
| 5 | dnl This file is part of the GNU MP Library. |
| 6 | dnl |
| 7 | dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | dnl it under the terms of either: |
| 9 | dnl |
| 10 | dnl * the GNU Lesser General Public License as published by the Free |
| 11 | dnl Software Foundation; either version 3 of the License, or (at your |
| 12 | dnl option) any later version. |
| 13 | dnl |
| 14 | dnl or |
| 15 | dnl |
| 16 | dnl * the GNU General Public License as published by the Free Software |
| 17 | dnl Foundation; either version 2 of the License, or (at your option) any |
| 18 | dnl later version. |
| 19 | dnl |
| 20 | dnl or both in parallel, as here. |
| 21 | dnl |
| 22 | dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | dnl for more details. |
| 26 | dnl |
| 27 | dnl You should have received copies of the GNU General Public License and the |
| 28 | dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | dnl see https://www.gnu.org/licenses/. |
| 30 | |
| 31 | include(`../config.m4') |
| 32 | |
| 33 | C cycles/limb |
| 34 | C 603e: ? |
| 35 | C 604e: 3.0 |
| 36 | C 75x (G3): 3.0 |
| 37 | C 7400,7410 (G4): 3.0 |
| 38 | C 7445,7455 (G4+): 2.5 |
| 39 | C 7447,7457 (G4+): 2.25 |
| 40 | C power4/ppc970: 2.5 |
| 41 | C power5: 2.5 |
| 42 | |
| 43 | C INPUT PARAMETERS |
| 44 | C rp r3 |
| 45 | C up r4 |
| 46 | C n r5 |
| 47 | C cnt r6 |
| 48 | |
| 49 | ASM_START() |
| 50 | PROLOGUE(mpn_rshift) |
| 51 | cmpwi cr0, r5, 30 C more than 30 limbs? |
| 52 | addi r7, r3, -4 C dst-4 |
| 53 | bgt L(BIG) C branch if more than 12 limbs |
| 54 | |
| 55 | mtctr r5 C copy size into CTR |
| 56 | subfic r8, r6, 32 |
| 57 | lwz r11, 0(r4) C load first s1 limb |
| 58 | slw r3, r11, r8 C compute function return value |
| 59 | bdz L(end1) |
| 60 | |
| 61 | L(oop): lwzu r10, 4(r4) |
| 62 | srw r9, r11, r6 |
| 63 | slw r12, r10, r8 |
| 64 | or r9, r9, r12 |
| 65 | stwu r9, 4(r7) |
| 66 | bdz L(end2) |
| 67 | lwzu r11, 4(r4) |
| 68 | srw r9, r10, r6 |
| 69 | slw r12, r11, r8 |
| 70 | or r9, r9, r12 |
| 71 | stwu r9, 4(r7) |
| 72 | bdnz L(oop) |
| 73 | |
| 74 | L(end1): |
| 75 | srw r0, r11, r6 |
| 76 | stw r0, 4(r7) |
| 77 | blr |
| 78 | L(end2): |
| 79 | srw r0, r10, r6 |
| 80 | stw r0, 4(r7) |
| 81 | blr |
| 82 | |
| 83 | L(BIG): |
| 84 | stwu r1, -48(r1) |
| 85 | stmw r24, 8(r1) C save registers we are supposed to preserve |
| 86 | lwz r9, 0(r4) |
| 87 | subfic r8, r6, 32 |
| 88 | slw r3, r9, r8 C compute function return value |
| 89 | srw r0, r9, r6 |
| 90 | addi r5, r5, -1 |
| 91 | |
| 92 | andi. r10, r5, 3 C count for spill loop |
| 93 | beq L(e) |
| 94 | mtctr r10 |
| 95 | lwzu r28, 4(r4) |
| 96 | bdz L(xe0) |
| 97 | |
| 98 | L(loop0): |
| 99 | srw r12, r28, r6 |
| 100 | slw r24, r28, r8 |
| 101 | lwzu r28, 4(r4) |
| 102 | or r24, r0, r24 |
| 103 | stwu r24, 4(r7) |
| 104 | mr r0, r12 |
| 105 | bdnz L(loop0) C taken at most once! |
| 106 | |
| 107 | L(xe0): srw r12, r28, r6 |
| 108 | slw r24, r28, r8 |
| 109 | or r24, r0, r24 |
| 110 | stwu r24, 4(r7) |
| 111 | mr r0, r12 |
| 112 | |
| 113 | L(e): srwi r5, r5, 2 C count for unrolled loop |
| 114 | addi r5, r5, -1 |
| 115 | mtctr r5 |
| 116 | lwz r28, 4(r4) |
| 117 | lwz r29, 8(r4) |
| 118 | lwz r30, 12(r4) |
| 119 | lwzu r31, 16(r4) |
| 120 | |
| 121 | L(loopU): |
| 122 | srw r9, r28, r6 |
| 123 | slw r24, r28, r8 |
| 124 | lwz r28, 4(r4) |
| 125 | srw r10, r29, r6 |
| 126 | slw r25, r29, r8 |
| 127 | lwz r29, 8(r4) |
| 128 | srw r11, r30, r6 |
| 129 | slw r26, r30, r8 |
| 130 | lwz r30, 12(r4) |
| 131 | srw r12, r31, r6 |
| 132 | slw r27, r31, r8 |
| 133 | lwzu r31, 16(r4) |
| 134 | or r24, r0, r24 |
| 135 | stw r24, 4(r7) |
| 136 | or r25, r9, r25 |
| 137 | stw r25, 8(r7) |
| 138 | or r26, r10, r26 |
| 139 | stw r26, 12(r7) |
| 140 | or r27, r11, r27 |
| 141 | stwu r27, 16(r7) |
| 142 | mr r0, r12 |
| 143 | bdnz L(loopU) |
| 144 | |
| 145 | srw r9, r28, r6 |
| 146 | slw r24, r28, r8 |
| 147 | srw r10, r29, r6 |
| 148 | slw r25, r29, r8 |
| 149 | srw r11, r30, r6 |
| 150 | slw r26, r30, r8 |
| 151 | srw r12, r31, r6 |
| 152 | slw r27, r31, r8 |
| 153 | or r24, r0, r24 |
| 154 | stw r24, 4(r7) |
| 155 | or r25, r9, r25 |
| 156 | stw r25, 8(r7) |
| 157 | or r26, r10, r26 |
| 158 | stw r26, 12(r7) |
| 159 | or r27, r11, r27 |
| 160 | stw r27, 16(r7) |
| 161 | |
| 162 | stw r12, 20(r7) |
| 163 | lmw r24, 8(r1) C restore registers |
| 164 | addi r1, r1, 48 |
| 165 | blr |
| 166 | EPILOGUE() |