Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | dnl IA-64 mpn_copyi -- copy limb vector, incrementing. |
| 2 | |
| 3 | dnl Contributed to the GNU project by Torbjorn Granlund. |
| 4 | |
| 5 | dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. |
| 6 | |
| 7 | dnl This file is part of the GNU MP Library. |
| 8 | dnl |
| 9 | dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| 10 | dnl it under the terms of either: |
| 11 | dnl |
| 12 | dnl * the GNU Lesser General Public License as published by the Free |
| 13 | dnl Software Foundation; either version 3 of the License, or (at your |
| 14 | dnl option) any later version. |
| 15 | dnl |
| 16 | dnl or |
| 17 | dnl |
| 18 | dnl * the GNU General Public License as published by the Free Software |
| 19 | dnl Foundation; either version 2 of the License, or (at your option) any |
| 20 | dnl later version. |
| 21 | dnl |
| 22 | dnl or both in parallel, as here. |
| 23 | dnl |
| 24 | dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| 25 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 26 | dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 27 | dnl for more details. |
| 28 | dnl |
| 29 | dnl You should have received copies of the GNU General Public License and the |
| 30 | dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
| 31 | dnl see https://www.gnu.org/licenses/. |
| 32 | |
| 33 | include(`../config.m4') |
| 34 | |
| 35 | C cycles/limb |
| 36 | C Itanium: 1 |
| 37 | C Itanium 2: 0.5 |
| 38 | |
| 39 | C INPUT PARAMETERS |
| 40 | C rp = r32 |
| 41 | C sp = r33 |
| 42 | C n = r34 |
| 43 | |
| 44 | ASM_START() |
| 45 | PROLOGUE(mpn_copyi) |
| 46 | .prologue |
| 47 | .save ar.lc, r2 |
| 48 | .body |
| 49 | ifdef(`HAVE_ABI_32', |
| 50 | ` addp4 r32 = 0, r32 |
| 51 | addp4 r33 = 0, r33 |
| 52 | sxt4 r34 = r34 |
| 53 | ;; |
| 54 | ') |
| 55 | {.mmi |
| 56 | nop 0 |
| 57 | nop 0 |
| 58 | mov.i r2 = ar.lc |
| 59 | } |
| 60 | {.mmi |
| 61 | and r14 = 3, r34 |
| 62 | cmp.ge p14, p15 = 3, r34 |
| 63 | add r34 = -4, r34 |
| 64 | ;; |
| 65 | } |
| 66 | {.mmi |
| 67 | cmp.eq p8, p0 = 1, r14 |
| 68 | cmp.eq p10, p0 = 2, r14 |
| 69 | cmp.eq p12, p0 = 3, r14 |
| 70 | } |
| 71 | {.bbb |
| 72 | (p8) br.dptk .Lb01 |
| 73 | (p10) br.dptk .Lb10 |
| 74 | (p12) br.dptk .Lb11 |
| 75 | } |
| 76 | |
| 77 | .Lb00: C n = 0, 4, 8, 12, ... |
| 78 | (p14) br.dptk .Ls00 |
| 79 | ;; |
| 80 | add r21 = 8, r33 |
| 81 | ld8 r16 = [r33], 16 |
| 82 | shr r15 = r34, 2 |
| 83 | ;; |
| 84 | ld8 r17 = [r21], 16 |
| 85 | mov.i ar.lc = r15 |
| 86 | ld8 r18 = [r33], 16 |
| 87 | add r20 = 8, r32 |
| 88 | ;; |
| 89 | ld8 r19 = [r21], 16 |
| 90 | br.cloop.dptk .Loop |
| 91 | ;; |
| 92 | br.sptk .Lend |
| 93 | ;; |
| 94 | |
| 95 | .Lb01: C n = 1, 5, 9, 13, ... |
| 96 | add r21 = 0, r33 |
| 97 | add r20 = 0, r32 |
| 98 | add r33 = 8, r33 |
| 99 | add r32 = 8, r32 |
| 100 | ;; |
| 101 | ld8 r19 = [r21], 16 |
| 102 | shr r15 = r34, 2 |
| 103 | (p14) br.dptk .Ls01 |
| 104 | ;; |
| 105 | ld8 r16 = [r33], 16 |
| 106 | mov.i ar.lc = r15 |
| 107 | ;; |
| 108 | ld8 r17 = [r21], 16 |
| 109 | ld8 r18 = [r33], 16 |
| 110 | br.sptk .Li01 |
| 111 | ;; |
| 112 | |
| 113 | .Lb10: C n = 2,6, 10, 14, ... |
| 114 | add r21 = 8, r33 |
| 115 | add r20 = 8, r32 |
| 116 | ld8 r18 = [r33], 16 |
| 117 | shr r15 = r34, 2 |
| 118 | ;; |
| 119 | ld8 r19 = [r21], 16 |
| 120 | mov.i ar.lc = r15 |
| 121 | (p14) br.dptk .Ls10 |
| 122 | ;; |
| 123 | ld8 r16 = [r33], 16 |
| 124 | ld8 r17 = [r21], 16 |
| 125 | br.sptk .Li10 |
| 126 | ;; |
| 127 | |
| 128 | .Lb11: C n = 3, 7, 11, 15, ... |
| 129 | add r21 = 0, r33 |
| 130 | add r20 = 0, r32 |
| 131 | add r33 = 8, r33 |
| 132 | add r32 = 8, r32 |
| 133 | ;; |
| 134 | ld8 r17 = [r21], 16 |
| 135 | shr r15 = r34, 2 |
| 136 | ;; |
| 137 | ld8 r18 = [r33], 16 |
| 138 | mov.i ar.lc = r15 |
| 139 | ld8 r19 = [r21], 16 |
| 140 | (p14) br.dptk .Ls11 |
| 141 | ;; |
| 142 | ld8 r16 = [r33], 16 |
| 143 | br.sptk .Li11 |
| 144 | ;; |
| 145 | |
| 146 | ALIGN(32) |
| 147 | .Loop: |
| 148 | .Li00: |
| 149 | {.mmb |
| 150 | st8 [r32] = r16, 16 |
| 151 | ld8 r16 = [r33], 16 |
| 152 | nop.b 0 |
| 153 | } |
| 154 | .Li11: |
| 155 | {.mmb |
| 156 | st8 [r20] = r17, 16 |
| 157 | ld8 r17 = [r21], 16 |
| 158 | nop.b 0 |
| 159 | ;; |
| 160 | } |
| 161 | .Li10: |
| 162 | {.mmb |
| 163 | st8 [r32] = r18, 16 |
| 164 | ld8 r18 = [r33], 16 |
| 165 | nop.b 0 |
| 166 | } |
| 167 | .Li01: |
| 168 | {.mmb |
| 169 | st8 [r20] = r19, 16 |
| 170 | ld8 r19 = [r21], 16 |
| 171 | br.cloop.dptk .Loop |
| 172 | ;; |
| 173 | } |
| 174 | .Lend: st8 [r32] = r16, 16 |
| 175 | .Ls11: st8 [r20] = r17, 16 |
| 176 | ;; |
| 177 | .Ls10: st8 [r32] = r18, 16 |
| 178 | .Ls01: st8 [r20] = r19, 16 |
| 179 | .Ls00: mov.i ar.lc = r2 |
| 180 | br.ret.sptk.many b0 |
| 181 | EPILOGUE() |
| 182 | ASM_END() |