Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | dnl HP-PA 2.0 mpn_rshift -- Right shift. |
| 2 | |
| 3 | dnl Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc. |
| 4 | |
| 5 | dnl This file is part of the GNU MP Library. |
| 6 | dnl |
| 7 | dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | dnl it under the terms of either: |
| 9 | dnl |
| 10 | dnl * the GNU Lesser General Public License as published by the Free |
| 11 | dnl Software Foundation; either version 3 of the License, or (at your |
| 12 | dnl option) any later version. |
| 13 | dnl |
| 14 | dnl or |
| 15 | dnl |
| 16 | dnl * the GNU General Public License as published by the Free Software |
| 17 | dnl Foundation; either version 2 of the License, or (at your option) any |
| 18 | dnl later version. |
| 19 | dnl |
| 20 | dnl or both in parallel, as here. |
| 21 | dnl |
| 22 | dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | dnl for more details. |
| 26 | dnl |
| 27 | dnl You should have received copies of the GNU General Public License and the |
| 28 | dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | dnl see https://www.gnu.org/licenses/. |
| 30 | |
| 31 | |
| 32 | dnl This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500. |
| 33 | |
| 34 | include(`../config.m4') |
| 35 | |
| 36 | dnl INPUT PARAMETERS |
| 37 | define(`rp',`%r26') |
| 38 | define(`up',`%r25') |
| 39 | define(`n',`%r24') |
| 40 | define(`cnt',`%r23') |
| 41 | |
| 42 | ifdef(`HAVE_ABI_2_0w', |
| 43 | ` .level 2.0w |
| 44 | ',` .level 2.0 |
| 45 | ') |
| 46 | PROLOGUE(mpn_rshift) |
| 47 | mtsar cnt |
| 48 | ldd 0(up), %r21 |
| 49 | addib,= -1, n, L(end) |
| 50 | shrpd %r21, %r0, %sar, %r29 C compute carry out limb |
| 51 | depw,z n, 31, 3, %r28 C r28 = (size & 7) |
| 52 | sub %r0, n, %r22 |
| 53 | depw,z %r22, 28, 3, %r22 C r22 = 8 * (-size & 7) |
| 54 | sub up, %r22, up C offset up |
| 55 | blr %r28, %r0 C branch into jump table |
| 56 | sub rp, %r22, rp C offset rp |
| 57 | b L(0) |
| 58 | nop |
| 59 | b L(1) |
| 60 | copy %r21, %r20 |
| 61 | b L(2) |
| 62 | nop |
| 63 | b L(3) |
| 64 | copy %r21, %r20 |
| 65 | b L(4) |
| 66 | nop |
| 67 | b L(5) |
| 68 | copy %r21, %r20 |
| 69 | b L(6) |
| 70 | nop |
| 71 | b L(7) |
| 72 | copy %r21, %r20 |
| 73 | |
| 74 | LDEF(loop) |
| 75 | LDEF(0) ldd 8(up), %r20 |
| 76 | shrpd %r20, %r21, %sar, %r21 |
| 77 | std %r21, 0(rp) |
| 78 | LDEF(7) ldd 16(up), %r21 |
| 79 | shrpd %r21, %r20, %sar, %r20 |
| 80 | std %r20, 8(rp) |
| 81 | LDEF(6) ldd 24(up), %r20 |
| 82 | shrpd %r20, %r21, %sar, %r21 |
| 83 | std %r21, 16(rp) |
| 84 | LDEF(5) ldd 32(up), %r21 |
| 85 | shrpd %r21, %r20, %sar, %r20 |
| 86 | std %r20, 24(rp) |
| 87 | LDEF(4) ldd 40(up), %r20 |
| 88 | shrpd %r20, %r21, %sar, %r21 |
| 89 | std %r21, 32(rp) |
| 90 | LDEF(3) ldd 48(up), %r21 |
| 91 | shrpd %r21, %r20, %sar, %r20 |
| 92 | std %r20, 40(rp) |
| 93 | LDEF(2) ldd 56(up), %r20 |
| 94 | shrpd %r20, %r21, %sar, %r21 |
| 95 | std %r21, 48(rp) |
| 96 | LDEF(1) ldd 64(up), %r21 |
| 97 | ldo 64(up), up |
| 98 | shrpd %r21, %r20, %sar, %r20 |
| 99 | std %r20, 56(rp) |
| 100 | addib,> -8, n, L(loop) |
| 101 | ldo 64(rp), rp |
| 102 | |
| 103 | LDEF(end) |
| 104 | shrpd %r0, %r21, %sar, %r21 |
| 105 | std %r21, 0(rp) |
| 106 | bve (%r2) |
| 107 | ifdef(`HAVE_ABI_2_0w', |
| 108 | ` copy %r29,%r28 |
| 109 | ',` extrd,u %r29, 31, 32, %r28 |
| 110 | ') |
| 111 | EPILOGUE(mpn_rshift) |