Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | dnl PowerPC-64 mpn_com. |
| 2 | |
| 3 | dnl Copyright 2004, 2005, 2013 Free Software Foundation, Inc. |
| 4 | |
| 5 | dnl This file is part of the GNU MP Library. |
| 6 | dnl |
| 7 | dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | dnl it under the terms of either: |
| 9 | dnl |
| 10 | dnl * the GNU Lesser General Public License as published by the Free |
| 11 | dnl Software Foundation; either version 3 of the License, or (at your |
| 12 | dnl option) any later version. |
| 13 | dnl |
| 14 | dnl or |
| 15 | dnl |
| 16 | dnl * the GNU General Public License as published by the Free Software |
| 17 | dnl Foundation; either version 2 of the License, or (at your option) any |
| 18 | dnl later version. |
| 19 | dnl |
| 20 | dnl or both in parallel, as here. |
| 21 | dnl |
| 22 | dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | dnl for more details. |
| 26 | dnl |
| 27 | dnl You should have received copies of the GNU General Public License and the |
| 28 | dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | dnl see https://www.gnu.org/licenses/. |
| 30 | |
| 31 | include(`../config.m4') |
| 32 | |
| 33 | C cycles/limb |
| 34 | C POWER3/PPC630 ? |
| 35 | C POWER4/PPC970 1.25 |
| 36 | C POWER5 ? |
| 37 | C POWER6 1.32 |
| 38 | C POWER7 1.13 |
| 39 | |
| 40 | C INPUT PARAMETERS |
| 41 | define(`rp', `r3') |
| 42 | define(`up', `r4') |
| 43 | define(`n', `r5') |
| 44 | |
| 45 | ASM_START() |
| 46 | PROLOGUE(mpn_com) |
| 47 | |
| 48 | ifdef(`HAVE_ABI_mode32', |
| 49 | ` rldicl n, n, 0,32') |
| 50 | |
| 51 | cmpdi cr0, n, 4 |
| 52 | blt L(sml) |
| 53 | |
| 54 | addi r10, n, 4 |
| 55 | srdi r10, r10, 3 |
| 56 | mtctr r10 |
| 57 | |
| 58 | andi. r0, n, 1 |
| 59 | rlwinm r11, n, 0,30,30 |
| 60 | rlwinm r12, n, 0,29,29 |
| 61 | cmpdi cr6, r11, 0 |
| 62 | cmpdi cr7, r12, 0 |
| 63 | |
| 64 | beq cr0, L(xx0) |
| 65 | L(xx1): ld r6, 0(up) |
| 66 | addi up, up, 8 |
| 67 | nor r6, r6, r6 |
| 68 | std r6, 0(rp) |
| 69 | addi rp, rp, 8 |
| 70 | |
| 71 | L(xx0): bne cr6, L(x10) |
| 72 | L(x00): ld r6, 0(r4) |
| 73 | ld r7, 8(r4) |
| 74 | bne cr7, L(100) |
| 75 | L(000): addi rp, rp, -32 |
| 76 | b L(lo0) |
| 77 | L(100): addi up, up, -32 |
| 78 | b L(lo4) |
| 79 | L(x10): ld r8, 0(r4) |
| 80 | ld r9, 8(r4) |
| 81 | bne cr7, L(110) |
| 82 | L(010): addi up, up, 16 |
| 83 | addi rp, rp, -16 |
| 84 | b L(lo2) |
| 85 | L(110): addi up, up, -16 |
| 86 | addi rp, rp, -48 |
| 87 | b L(lo6) |
| 88 | |
| 89 | L(sml): mtctr n |
| 90 | L(t): ld r6, 0(up) |
| 91 | addi up, up, 8 |
| 92 | nor r6, r6, r6 |
| 93 | std r6, 0(rp) |
| 94 | addi rp, rp, 8 |
| 95 | bdnz L(t) |
| 96 | blr |
| 97 | |
| 98 | ALIGN(32) |
| 99 | L(top): nor r6, r6, r6 |
| 100 | nor r7, r7, r7 |
| 101 | std r6, 0(rp) |
| 102 | std r7, 8(rp) |
| 103 | L(lo2): ld r6, 0(up) |
| 104 | ld r7, 8(up) |
| 105 | nor r8, r8, r8 |
| 106 | nor r9, r9, r9 |
| 107 | std r8, 16(rp) |
| 108 | std r9, 24(rp) |
| 109 | L(lo0): ld r8, 16(up) |
| 110 | ld r9, 24(up) |
| 111 | nor r6, r6, r6 |
| 112 | nor r7, r7, r7 |
| 113 | std r6, 32(rp) |
| 114 | std r7, 40(rp) |
| 115 | L(lo6): ld r6, 32(up) |
| 116 | ld r7, 40(up) |
| 117 | nor r8, r8, r8 |
| 118 | nor r9, r9, r9 |
| 119 | std r8, 48(rp) |
| 120 | std r9, 56(rp) |
| 121 | addi rp, rp, 64 |
| 122 | L(lo4): ld r8, 48(up) |
| 123 | ld r9, 56(up) |
| 124 | addi up, up, 64 |
| 125 | bdnz L(top) |
| 126 | |
| 127 | L(end): nor r6, r6, r6 |
| 128 | nor r7, r7, r7 |
| 129 | std r6, 0(rp) |
| 130 | std r7, 8(rp) |
| 131 | nor r8, r8, r8 |
| 132 | nor r9, r9, r9 |
| 133 | std r8, 16(rp) |
| 134 | std r9, 24(rp) |
| 135 | blr |
| 136 | EPILOGUE() |