blob: d530a1723e977ee360a46dd48e497d14753044d0 [file] [log] [blame]
Austin Schuh8c267c72023-11-18 14:05:14 -08001#ifndef FRC971_ORIN_POINTS_H_
2#define FRC971_ORIN_POINTS_H_
3
4#include <stdint.h>
5
6#include <cub/iterator/transform_input_iterator.cuh>
7#include <cuda/std/tuple>
8#include <iomanip>
9#include <ostream>
10
11#include "cuda_runtime.h"
12#include "device_launch_parameters.h"
13
14namespace frc971 {
15namespace apriltag {
16
17// Class to hold the 2 adjacent blob IDs, a point in decimated image space, the
18// half pixel offset, and the gradient.
19//
20// rep0 and rep1 are the two blob ids, and are each allocated 20 bits.
21// point is the base point and is allocated 10 bits for x and 10 bits for y.
22// dx and dy are allocated 2 bits, and can only take on set values.
23// black_to_white captures the direction of the gradient in 1 bit.
24//
25// This adds up to 63 bits so we can load this with one big load.
26struct QuadBoundaryPoint {
27 static constexpr size_t kRepEndBit = 24;
28 static constexpr size_t kBitsInKey = 64;
29
30 __forceinline__ __host__ __device__ QuadBoundaryPoint() : key(0) {}
31
32 // Sets rep0, the 0th blob id. This only respects the bottom 20 bits.
33 __forceinline__ __host__ __device__ void set_rep0(uint32_t rep0) {
34 key = (key & 0xfffff00000ffffffull) |
35 (static_cast<uint64_t>(rep0 & 0xfffff) << 24);
36 }
37 // Returns rep0.
38 __forceinline__ __host__ __device__ uint32_t rep0() const {
39 return ((key >> 24) & 0xfffff);
40 }
41
42 // Sets rep1, the 1st blob id. This only respects the bottom 20 bits.
43 __forceinline__ __host__ __device__ void set_rep1(uint32_t rep1) {
44 key = (key & 0xfffffffffffull) |
45 (static_cast<uint64_t>(rep1 & 0xfffff) << 44);
46 }
47 // Returns rep1.
48 __forceinline__ __host__ __device__ uint32_t rep1() const {
49 return ((key >> 44) & 0xfffff);
50 }
51
52 // Returns both rep0 and rep1 concatenated into a single 40 bit number.
53 __forceinline__ __host__ __device__ uint64_t rep01() const {
54 return ((key >> 24) & 0xffffffffff);
55 }
56
57 // Returns all the bits used to hold position and gradient information.
58 __forceinline__ __host__ __device__ uint32_t point_bits() const {
59 return key & 0xffffff;
60 }
61
62 // Sets the 10 bit x and y.
63 __forceinline__ __host__ __device__ void set_base_xy(uint32_t x, uint32_t y) {
64 key = (key & 0xffffffffff00000full) |
65 (static_cast<uint64_t>(x & 0x3ff) << 14) |
66 (static_cast<uint64_t>(y & 0x3ff) << 4);
67 }
68
69 // Returns the base 10 bit x and y.
70 __forceinline__ __host__ __device__ uint32_t base_x() const {
71 return ((key >> 14) & 0x3ff);
72 }
73 __forceinline__ __host__ __device__ uint32_t base_y() const {
74 return ((key >> 4) & 0x3ff);
75 }
76
77 // Sets dxy, the integer representing which of the 4 search directions we
78 // went.
79 __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) {
80 key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3));
81 }
82
83 // Returns the change in x derived from the search direction.
84 __forceinline__ __host__ __device__ int32_t dx() const {
85 switch (key & 0x3) {
86 case 0:
87 return 1;
88 case 1:
89 return 1;
90 case 2:
91 return 0;
92 case 3:
93 return -1;
94 }
95 return 0;
96 }
97
98 // Returns the change in y derived from the search direction.
99 __forceinline__ __host__ __device__ int32_t dy() const {
100 switch (key & 0x3) {
101 case 0:
102 return 0;
103 case 1:
104 case 2:
105 case 3:
106 return 1;
107 }
108 return 0;
109 }
110
111 // Returns the un-decimated x and y positions.
112 __forceinline__ __host__ __device__ uint32_t x() const {
113 return static_cast<int32_t>(base_x() * 2) + dx();
114 }
115 __forceinline__ __host__ __device__ uint32_t y() const {
116 return static_cast<int32_t>(base_y() * 2) + dy();
117 }
118
119 // Returns the gradient that this point represents, taking into account which
120 // direction the color transitioned.
121 __forceinline__ __host__ __device__ int8_t gx() const {
122 return black_to_white() ? dx() : -dx();
123 }
124 __forceinline__ __host__ __device__ int8_t gy() const {
125 return black_to_white() ? dy() : -dy();
126 }
127
128 // Returns the black to white or white to black bit.
129 __forceinline__ __host__ __device__ void set_black_to_white(
130 bool black_to_white) {
131 key = (key & 0xfffffffffffffff7ull) |
132 (static_cast<uint64_t>(black_to_white) << 3);
133 }
134 __forceinline__ __host__ __device__ bool black_to_white() const {
135 return (key & 0x8) != 0;
136 }
137
138 // Various operators to make it easy to compare points.
139 __forceinline__ __host__ __device__ bool operator!=(
140 const QuadBoundaryPoint other) const {
141 return other.key != key;
142 }
143 __forceinline__ __host__ __device__ bool operator==(
144 const QuadBoundaryPoint other) const {
145 return other.key == key;
146 }
147 __forceinline__ __host__ __device__ bool operator<(
148 const QuadBoundaryPoint other) const {
149 return key < other.key;
150 }
151
152 // Returns true if this point has been set. Zero is reserved for "invalid"
153 __forceinline__ __host__ __device__ bool nonzero() const {
154 return key != 0ull;
155 }
156
157 // Returns true if this point is about the other point.
158 bool near(QuadBoundaryPoint other) const { return other == *this; }
159
160 // The key. This shouldn't be parsed directly.
161 uint64_t key;
162};
163
164std::ostream &operator<<(std::ostream &os, const QuadBoundaryPoint &point);
165
166// Holds a compacted blob index, the angle to the X axis from the center of the
167// blob, and the coordinate of the point.
168//
169// The blob index is 12 bits, the angle is 28 bits, and the point is 24 bits.
170struct IndexPoint {
171 // Max number of blob IDs we can hold.
172 static constexpr size_t kMaxBlobs = 2048;
173
174 static constexpr size_t kRepEndBit = 24;
175 static constexpr size_t kBitsInKey = 64;
176
177 __forceinline__ __host__ __device__ IndexPoint() : key(0) {}
178
179 // Constructor to build a point with just the blob index, and point bits. The
180 // point bits should be grabbed from a QuadBoundaryPoint rather than built up
181 // by hand.
182 __forceinline__ __host__ __device__ IndexPoint(uint32_t blob_index,
183 uint32_t point_bits)
184 : key((static_cast<uint64_t>(blob_index & 0xfff) << 52) |
185 (static_cast<uint64_t>(point_bits & 0xffffff))) {}
186
187 // Sets and gets the 12 bit blob index.
188 __forceinline__ __host__ __device__ void set_blob_index(uint32_t blob_index) {
189 key = (key & 0x000fffffffffffffull) |
190 (static_cast<uint64_t>(blob_index & 0xfff) << 52);
191 }
192 __forceinline__ __host__ __device__ uint32_t blob_index() const {
193 return ((key >> 52) & 0xfff);
194 }
195
196 // Sets and gets the 28 bit angle.
197 __forceinline__ __host__ __device__ void set_theta(uint32_t theta) {
198 key = (key & 0xfff0000000ffffffull) |
199 (static_cast<uint64_t>(theta & 0xfffffff) << 24);
200 }
201 __forceinline__ __host__ __device__ uint32_t theta() const {
202 return ((key >> 24) & 0xfffffff);
203 }
204
205 // See QuadBoundaryPoint for a description of the rest of these.
Justin Turcotteda332b82023-12-28 22:30:10 -0800206 // Sets the 10 bit x and y.
207 __forceinline__ __host__ __device__ void set_base_xy(uint32_t x, uint32_t y) {
208 key = (key & 0xffffffffff00000full) |
209 (static_cast<uint64_t>(x & 0x3ff) << 14) |
210 (static_cast<uint64_t>(y & 0x3ff) << 4);
211 }
212
Austin Schuh8c267c72023-11-18 14:05:14 -0800213 __forceinline__ __host__ __device__ uint32_t base_x() const {
214 return ((key >> 14) & 0x3ff);
215 }
Justin Turcotteda332b82023-12-28 22:30:10 -0800216
Austin Schuh8c267c72023-11-18 14:05:14 -0800217 __forceinline__ __host__ __device__ uint32_t base_y() const {
218 return ((key >> 4) & 0x3ff);
219 }
220
221 __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) {
222 key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3));
223 }
224
225 __forceinline__ __host__ __device__ int32_t dx() const {
226 switch (key & 0x3) {
227 case 0:
228 return 1;
229 case 1:
230 return 1;
231 case 2:
232 return 0;
233 case 3:
234 return -1;
235 }
236 return 0;
237 }
238
239 __forceinline__ __host__ __device__ int32_t dy() const {
240 switch (key & 0x3) {
241 case 0:
242 return 0;
243 case 1:
244 case 2:
245 case 3:
246 return 1;
247 }
248 return 0;
249 }
250
251 __forceinline__ __host__ __device__ uint32_t x() const {
252 return static_cast<int32_t>(base_x() * 2) + dx();
253 }
254 __forceinline__ __host__ __device__ uint32_t y() const {
255 return static_cast<int32_t>(base_y() * 2) + dy();
256 }
257
258 __forceinline__ __host__ __device__ int8_t gx() const {
259 return black_to_white() ? dx() : -dx();
260 }
261 __forceinline__ __host__ __device__ int8_t gy() const {
262 return black_to_white() ? dy() : -dy();
263 }
264
265 __forceinline__ __host__ __device__ uint32_t point_bits() const {
266 return key & 0xffffff;
267 }
268
269 __forceinline__ __host__ __device__ void set_black_to_white(
270 bool black_to_white) {
271 key = (key & 0xfffffffffffffff7ull) |
272 (static_cast<uint64_t>(black_to_white) << 3);
273 }
274 __forceinline__ __host__ __device__ bool black_to_white() const {
275 return (key & 0x8) != 0;
276 }
277
278 // The key. This shouldn't be parsed directly.
279 uint64_t key;
280};
281
282std::ostream &operator<<(std::ostream &os, const IndexPoint &point);
283
284// Decomposer for sorting which just returns the key.
285struct QuadBoundaryPointDecomposer {
286 __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()(
287 QuadBoundaryPoint &key) const {
288 return {key.key};
289 }
290};
291
292// Decomposer for sorting which just returns the key.
293struct QuadIndexPointDecomposer {
294 __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()(
295 IndexPoint &key) const {
296 return {key.key};
297 }
298};
299
300} // namespace apriltag
301} // namespace frc971
302
303#endif // FRC971_ORIN_POINTS_H_