blob: 49ed192021a68659a5f51fd3abb206d3399cda84 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001dnl IA-64 mpn_copyi -- copy limb vector, incrementing.
2
3dnl Contributed to the GNU project by Torbjorn Granlund.
4
5dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
6
7dnl This file is part of the GNU MP Library.
8dnl
9dnl The GNU MP Library is free software; you can redistribute it and/or modify
10dnl it under the terms of either:
11dnl
12dnl * the GNU Lesser General Public License as published by the Free
13dnl Software Foundation; either version 3 of the License, or (at your
14dnl option) any later version.
15dnl
16dnl or
17dnl
18dnl * the GNU General Public License as published by the Free Software
19dnl Foundation; either version 2 of the License, or (at your option) any
20dnl later version.
21dnl
22dnl or both in parallel, as here.
23dnl
24dnl The GNU MP Library is distributed in the hope that it will be useful, but
25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27dnl for more details.
28dnl
29dnl You should have received copies of the GNU General Public License and the
30dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31dnl see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C cycles/limb
36C Itanium: 1
37C Itanium 2: 0.5
38
39C INPUT PARAMETERS
40C rp = r32
41C sp = r33
42C n = r34
43
44ASM_START()
45PROLOGUE(mpn_copyi)
46 .prologue
47 .save ar.lc, r2
48 .body
49ifdef(`HAVE_ABI_32',
50` addp4 r32 = 0, r32
51 addp4 r33 = 0, r33
52 sxt4 r34 = r34
53 ;;
54')
55{.mmi
56 nop 0
57 nop 0
58 mov.i r2 = ar.lc
59}
60{.mmi
61 and r14 = 3, r34
62 cmp.ge p14, p15 = 3, r34
63 add r34 = -4, r34
64 ;;
65}
66{.mmi
67 cmp.eq p8, p0 = 1, r14
68 cmp.eq p10, p0 = 2, r14
69 cmp.eq p12, p0 = 3, r14
70}
71{.bbb
72 (p8) br.dptk .Lb01
73 (p10) br.dptk .Lb10
74 (p12) br.dptk .Lb11
75}
76
77.Lb00: C n = 0, 4, 8, 12, ...
78 (p14) br.dptk .Ls00
79 ;;
80 add r21 = 8, r33
81 ld8 r16 = [r33], 16
82 shr r15 = r34, 2
83 ;;
84 ld8 r17 = [r21], 16
85 mov.i ar.lc = r15
86 ld8 r18 = [r33], 16
87 add r20 = 8, r32
88 ;;
89 ld8 r19 = [r21], 16
90 br.cloop.dptk .Loop
91 ;;
92 br.sptk .Lend
93 ;;
94
95.Lb01: C n = 1, 5, 9, 13, ...
96 add r21 = 0, r33
97 add r20 = 0, r32
98 add r33 = 8, r33
99 add r32 = 8, r32
100 ;;
101 ld8 r19 = [r21], 16
102 shr r15 = r34, 2
103 (p14) br.dptk .Ls01
104 ;;
105 ld8 r16 = [r33], 16
106 mov.i ar.lc = r15
107 ;;
108 ld8 r17 = [r21], 16
109 ld8 r18 = [r33], 16
110 br.sptk .Li01
111 ;;
112
113.Lb10: C n = 2,6, 10, 14, ...
114 add r21 = 8, r33
115 add r20 = 8, r32
116 ld8 r18 = [r33], 16
117 shr r15 = r34, 2
118 ;;
119 ld8 r19 = [r21], 16
120 mov.i ar.lc = r15
121 (p14) br.dptk .Ls10
122 ;;
123 ld8 r16 = [r33], 16
124 ld8 r17 = [r21], 16
125 br.sptk .Li10
126 ;;
127
128.Lb11: C n = 3, 7, 11, 15, ...
129 add r21 = 0, r33
130 add r20 = 0, r32
131 add r33 = 8, r33
132 add r32 = 8, r32
133 ;;
134 ld8 r17 = [r21], 16
135 shr r15 = r34, 2
136 ;;
137 ld8 r18 = [r33], 16
138 mov.i ar.lc = r15
139 ld8 r19 = [r21], 16
140 (p14) br.dptk .Ls11
141 ;;
142 ld8 r16 = [r33], 16
143 br.sptk .Li11
144 ;;
145
146 ALIGN(32)
147.Loop:
148.Li00:
149{.mmb
150 st8 [r32] = r16, 16
151 ld8 r16 = [r33], 16
152 nop.b 0
153}
154.Li11:
155{.mmb
156 st8 [r20] = r17, 16
157 ld8 r17 = [r21], 16
158 nop.b 0
159 ;;
160}
161.Li10:
162{.mmb
163 st8 [r32] = r18, 16
164 ld8 r18 = [r33], 16
165 nop.b 0
166}
167.Li01:
168{.mmb
169 st8 [r20] = r19, 16
170 ld8 r19 = [r21], 16
171 br.cloop.dptk .Loop
172 ;;
173}
174.Lend: st8 [r32] = r16, 16
175.Ls11: st8 [r20] = r17, 16
176 ;;
177.Ls10: st8 [r32] = r18, 16
178.Ls01: st8 [r20] = r19, 16
179.Ls00: mov.i ar.lc = r2
180 br.ret.sptk.many b0
181EPILOGUE()
182ASM_END()