blob: 312d90a3a2424361d329f336b1da7b6e843878bb [file] [log] [blame]
Austin Schuh8c267c72023-11-18 14:05:14 -08001#ifndef FRC971_ORIN_POINTS_H_
2#define FRC971_ORIN_POINTS_H_
3
4#include <stdint.h>
5
6#include <cub/iterator/transform_input_iterator.cuh>
7#include <cuda/std/tuple>
8#include <iomanip>
9#include <ostream>
10
11#include "cuda_runtime.h"
12#include "device_launch_parameters.h"
13
14namespace frc971 {
15namespace apriltag {
16
17// Class to hold the 2 adjacent blob IDs, a point in decimated image space, the
18// half pixel offset, and the gradient.
19//
20// rep0 and rep1 are the two blob ids, and are each allocated 20 bits.
21// point is the base point and is allocated 10 bits for x and 10 bits for y.
22// dx and dy are allocated 2 bits, and can only take on set values.
23// black_to_white captures the direction of the gradient in 1 bit.
24//
25// This adds up to 63 bits so we can load this with one big load.
26struct QuadBoundaryPoint {
27 static constexpr size_t kRepEndBit = 24;
28 static constexpr size_t kBitsInKey = 64;
29
30 __forceinline__ __host__ __device__ QuadBoundaryPoint() : key(0) {}
31
32 // Sets rep0, the 0th blob id. This only respects the bottom 20 bits.
33 __forceinline__ __host__ __device__ void set_rep0(uint32_t rep0) {
34 key = (key & 0xfffff00000ffffffull) |
35 (static_cast<uint64_t>(rep0 & 0xfffff) << 24);
36 }
37 // Returns rep0.
38 __forceinline__ __host__ __device__ uint32_t rep0() const {
39 return ((key >> 24) & 0xfffff);
40 }
41
42 // Sets rep1, the 1st blob id. This only respects the bottom 20 bits.
43 __forceinline__ __host__ __device__ void set_rep1(uint32_t rep1) {
44 key = (key & 0xfffffffffffull) |
45 (static_cast<uint64_t>(rep1 & 0xfffff) << 44);
46 }
47 // Returns rep1.
48 __forceinline__ __host__ __device__ uint32_t rep1() const {
49 return ((key >> 44) & 0xfffff);
50 }
51
52 // Returns both rep0 and rep1 concatenated into a single 40 bit number.
53 __forceinline__ __host__ __device__ uint64_t rep01() const {
54 return ((key >> 24) & 0xffffffffff);
55 }
56
57 // Returns all the bits used to hold position and gradient information.
58 __forceinline__ __host__ __device__ uint32_t point_bits() const {
59 return key & 0xffffff;
60 }
61
62 // Sets the 10 bit x and y.
63 __forceinline__ __host__ __device__ void set_base_xy(uint32_t x, uint32_t y) {
64 key = (key & 0xffffffffff00000full) |
65 (static_cast<uint64_t>(x & 0x3ff) << 14) |
66 (static_cast<uint64_t>(y & 0x3ff) << 4);
67 }
68
69 // Returns the base 10 bit x and y.
70 __forceinline__ __host__ __device__ uint32_t base_x() const {
71 return ((key >> 14) & 0x3ff);
72 }
73 __forceinline__ __host__ __device__ uint32_t base_y() const {
74 return ((key >> 4) & 0x3ff);
75 }
76
77 // Sets dxy, the integer representing which of the 4 search directions we
78 // went.
79 __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) {
80 key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3));
81 }
82
83 // Returns the change in x derived from the search direction.
84 __forceinline__ __host__ __device__ int32_t dx() const {
85 switch (key & 0x3) {
86 case 0:
87 return 1;
88 case 1:
89 return 1;
90 case 2:
91 return 0;
92 case 3:
93 return -1;
94 }
95 return 0;
96 }
97
98 // Returns the change in y derived from the search direction.
99 __forceinline__ __host__ __device__ int32_t dy() const {
100 switch (key & 0x3) {
101 case 0:
102 return 0;
103 case 1:
104 case 2:
105 case 3:
106 return 1;
107 }
108 return 0;
109 }
110
111 // Returns the un-decimated x and y positions.
112 __forceinline__ __host__ __device__ uint32_t x() const {
113 return static_cast<int32_t>(base_x() * 2) + dx();
114 }
115 __forceinline__ __host__ __device__ uint32_t y() const {
116 return static_cast<int32_t>(base_y() * 2) + dy();
117 }
118
119 // Returns the gradient that this point represents, taking into account which
120 // direction the color transitioned.
121 __forceinline__ __host__ __device__ int8_t gx() const {
122 return black_to_white() ? dx() : -dx();
123 }
124 __forceinline__ __host__ __device__ int8_t gy() const {
125 return black_to_white() ? dy() : -dy();
126 }
127
128 // Returns the black to white or white to black bit.
129 __forceinline__ __host__ __device__ void set_black_to_white(
130 bool black_to_white) {
131 key = (key & 0xfffffffffffffff7ull) |
132 (static_cast<uint64_t>(black_to_white) << 3);
133 }
134 __forceinline__ __host__ __device__ bool black_to_white() const {
135 return (key & 0x8) != 0;
136 }
137
138 // Various operators to make it easy to compare points.
139 __forceinline__ __host__ __device__ bool operator!=(
140 const QuadBoundaryPoint other) const {
141 return other.key != key;
142 }
143 __forceinline__ __host__ __device__ bool operator==(
144 const QuadBoundaryPoint other) const {
145 return other.key == key;
146 }
147 __forceinline__ __host__ __device__ bool operator<(
148 const QuadBoundaryPoint other) const {
149 return key < other.key;
150 }
151
152 // Returns true if this point has been set. Zero is reserved for "invalid"
153 __forceinline__ __host__ __device__ bool nonzero() const {
154 return key != 0ull;
155 }
156
157 // Returns true if this point is about the other point.
158 bool near(QuadBoundaryPoint other) const { return other == *this; }
159
160 // The key. This shouldn't be parsed directly.
161 uint64_t key;
162};
163
164std::ostream &operator<<(std::ostream &os, const QuadBoundaryPoint &point);
165
166// Holds a compacted blob index, the angle to the X axis from the center of the
167// blob, and the coordinate of the point.
168//
169// The blob index is 12 bits, the angle is 28 bits, and the point is 24 bits.
170struct IndexPoint {
171 // Max number of blob IDs we can hold.
172 static constexpr size_t kMaxBlobs = 2048;
173
174 static constexpr size_t kRepEndBit = 24;
175 static constexpr size_t kBitsInKey = 64;
176
177 __forceinline__ __host__ __device__ IndexPoint() : key(0) {}
178
179 // Constructor to build a point with just the blob index, and point bits. The
180 // point bits should be grabbed from a QuadBoundaryPoint rather than built up
181 // by hand.
182 __forceinline__ __host__ __device__ IndexPoint(uint32_t blob_index,
183 uint32_t point_bits)
184 : key((static_cast<uint64_t>(blob_index & 0xfff) << 52) |
185 (static_cast<uint64_t>(point_bits & 0xffffff))) {}
186
187 // Sets and gets the 12 bit blob index.
188 __forceinline__ __host__ __device__ void set_blob_index(uint32_t blob_index) {
189 key = (key & 0x000fffffffffffffull) |
190 (static_cast<uint64_t>(blob_index & 0xfff) << 52);
191 }
192 __forceinline__ __host__ __device__ uint32_t blob_index() const {
193 return ((key >> 52) & 0xfff);
194 }
195
196 // Sets and gets the 28 bit angle.
197 __forceinline__ __host__ __device__ void set_theta(uint32_t theta) {
198 key = (key & 0xfff0000000ffffffull) |
199 (static_cast<uint64_t>(theta & 0xfffffff) << 24);
200 }
201 __forceinline__ __host__ __device__ uint32_t theta() const {
202 return ((key >> 24) & 0xfffffff);
203 }
204
205 // See QuadBoundaryPoint for a description of the rest of these.
206 __forceinline__ __host__ __device__ uint32_t base_x() const {
207 return ((key >> 14) & 0x3ff);
208 }
209 __forceinline__ __host__ __device__ uint32_t base_y() const {
210 return ((key >> 4) & 0x3ff);
211 }
212
213 __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) {
214 key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3));
215 }
216
217 __forceinline__ __host__ __device__ int32_t dx() const {
218 switch (key & 0x3) {
219 case 0:
220 return 1;
221 case 1:
222 return 1;
223 case 2:
224 return 0;
225 case 3:
226 return -1;
227 }
228 return 0;
229 }
230
231 __forceinline__ __host__ __device__ int32_t dy() const {
232 switch (key & 0x3) {
233 case 0:
234 return 0;
235 case 1:
236 case 2:
237 case 3:
238 return 1;
239 }
240 return 0;
241 }
242
243 __forceinline__ __host__ __device__ uint32_t x() const {
244 return static_cast<int32_t>(base_x() * 2) + dx();
245 }
246 __forceinline__ __host__ __device__ uint32_t y() const {
247 return static_cast<int32_t>(base_y() * 2) + dy();
248 }
249
250 __forceinline__ __host__ __device__ int8_t gx() const {
251 return black_to_white() ? dx() : -dx();
252 }
253 __forceinline__ __host__ __device__ int8_t gy() const {
254 return black_to_white() ? dy() : -dy();
255 }
256
257 __forceinline__ __host__ __device__ uint32_t point_bits() const {
258 return key & 0xffffff;
259 }
260
261 __forceinline__ __host__ __device__ void set_black_to_white(
262 bool black_to_white) {
263 key = (key & 0xfffffffffffffff7ull) |
264 (static_cast<uint64_t>(black_to_white) << 3);
265 }
266 __forceinline__ __host__ __device__ bool black_to_white() const {
267 return (key & 0x8) != 0;
268 }
269
270 // The key. This shouldn't be parsed directly.
271 uint64_t key;
272};
273
274std::ostream &operator<<(std::ostream &os, const IndexPoint &point);
275
276// Decomposer for sorting which just returns the key.
277struct QuadBoundaryPointDecomposer {
278 __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()(
279 QuadBoundaryPoint &key) const {
280 return {key.key};
281 }
282};
283
284// Decomposer for sorting which just returns the key.
285struct QuadIndexPointDecomposer {
286 __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()(
287 IndexPoint &key) const {
288 return {key.key};
289 }
290};
291
292} // namespace apriltag
293} // namespace frc971
294
295#endif // FRC971_ORIN_POINTS_H_