| #ifndef FRC971_ORIN_POINTS_H_ |
| #define FRC971_ORIN_POINTS_H_ |
| |
| #include <stdint.h> |
| |
| #include <cub/iterator/transform_input_iterator.cuh> |
| #include <cuda/std/tuple> |
| #include <iomanip> |
| #include <ostream> |
| |
| #include "cuda_runtime.h" |
| #include "device_launch_parameters.h" |
| |
| namespace frc971::apriltag { |
| |
| // Class to hold the 2 adjacent blob IDs, a point in decimated image space, the |
| // half pixel offset, and the gradient. |
| // |
| // rep0 and rep1 are the two blob ids, and are each allocated 20 bits. |
| // point is the base point and is allocated 10 bits for x and 10 bits for y. |
| // dx and dy are allocated 2 bits, and can only take on set values. |
| // black_to_white captures the direction of the gradient in 1 bit. |
| // |
| // This adds up to 63 bits so we can load this with one big load. |
| struct QuadBoundaryPoint { |
| static constexpr size_t kRepEndBit = 24; |
| static constexpr size_t kBitsInKey = 64; |
| |
| __forceinline__ __host__ __device__ QuadBoundaryPoint() : key(0) {} |
| |
| // Sets rep0, the 0th blob id. This only respects the bottom 20 bits. |
| __forceinline__ __host__ __device__ void set_rep0(uint32_t rep0) { |
| key = (key & 0xfffff00000ffffffull) | |
| (static_cast<uint64_t>(rep0 & 0xfffff) << 24); |
| } |
| // Returns rep0. |
| __forceinline__ __host__ __device__ uint32_t rep0() const { |
| return ((key >> 24) & 0xfffff); |
| } |
| |
| // Sets rep1, the 1st blob id. This only respects the bottom 20 bits. |
| __forceinline__ __host__ __device__ void set_rep1(uint32_t rep1) { |
| key = (key & 0xfffffffffffull) | |
| (static_cast<uint64_t>(rep1 & 0xfffff) << 44); |
| } |
| // Returns rep1. |
| __forceinline__ __host__ __device__ uint32_t rep1() const { |
| return ((key >> 44) & 0xfffff); |
| } |
| |
| // Returns both rep0 and rep1 concatenated into a single 40 bit number. |
| __forceinline__ __host__ __device__ uint64_t rep01() const { |
| return ((key >> 24) & 0xffffffffff); |
| } |
| |
| // Returns all the bits used to hold position and gradient information. |
| __forceinline__ __host__ __device__ uint32_t point_bits() const { |
| return key & 0xffffff; |
| } |
| |
| // Sets the 10 bit x and y. |
| __forceinline__ __host__ __device__ void set_base_xy(uint32_t x, uint32_t y) { |
| key = (key & 0xffffffffff00000full) | |
| (static_cast<uint64_t>(x & 0x3ff) << 14) | |
| (static_cast<uint64_t>(y & 0x3ff) << 4); |
| } |
| |
| // Returns the base 10 bit x and y. |
| __forceinline__ __host__ __device__ uint32_t base_x() const { |
| return ((key >> 14) & 0x3ff); |
| } |
| __forceinline__ __host__ __device__ uint32_t base_y() const { |
| return ((key >> 4) & 0x3ff); |
| } |
| |
| // Sets dxy, the integer representing which of the 4 search directions we |
| // went. |
| __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) { |
| key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3)); |
| } |
| |
| // Returns the change in x derived from the search direction. |
| __forceinline__ __host__ __device__ int32_t dx() const { |
| switch (key & 0x3) { |
| case 0: |
| return 1; |
| case 1: |
| return 1; |
| case 2: |
| return 0; |
| case 3: |
| return -1; |
| } |
| return 0; |
| } |
| |
| // Returns the change in y derived from the search direction. |
| __forceinline__ __host__ __device__ int32_t dy() const { |
| switch (key & 0x3) { |
| case 0: |
| return 0; |
| case 1: |
| case 2: |
| case 3: |
| return 1; |
| } |
| return 0; |
| } |
| |
| // Returns the un-decimated x and y positions. |
| __forceinline__ __host__ __device__ uint32_t x() const { |
| return static_cast<int32_t>(base_x() * 2) + dx(); |
| } |
| __forceinline__ __host__ __device__ uint32_t y() const { |
| return static_cast<int32_t>(base_y() * 2) + dy(); |
| } |
| |
| // Returns the gradient that this point represents, taking into account which |
| // direction the color transitioned. |
| __forceinline__ __host__ __device__ int8_t gx() const { |
| return black_to_white() ? dx() : -dx(); |
| } |
| __forceinline__ __host__ __device__ int8_t gy() const { |
| return black_to_white() ? dy() : -dy(); |
| } |
| |
| // Returns the black to white or white to black bit. |
| __forceinline__ __host__ __device__ void set_black_to_white( |
| bool black_to_white) { |
| key = (key & 0xfffffffffffffff7ull) | |
| (static_cast<uint64_t>(black_to_white) << 3); |
| } |
| __forceinline__ __host__ __device__ bool black_to_white() const { |
| return (key & 0x8) != 0; |
| } |
| |
| // Various operators to make it easy to compare points. |
| __forceinline__ __host__ __device__ bool operator!=( |
| const QuadBoundaryPoint other) const { |
| return other.key != key; |
| } |
| __forceinline__ __host__ __device__ bool operator==( |
| const QuadBoundaryPoint other) const { |
| return other.key == key; |
| } |
| __forceinline__ __host__ __device__ bool operator<( |
| const QuadBoundaryPoint other) const { |
| return key < other.key; |
| } |
| |
| // Returns true if this point has been set. Zero is reserved for "invalid" |
| __forceinline__ __host__ __device__ bool nonzero() const { |
| return key != 0ull; |
| } |
| |
| // Returns true if this point is about the other point. |
| bool near(QuadBoundaryPoint other) const { return other == *this; } |
| |
| // The key. This shouldn't be parsed directly. |
| uint64_t key; |
| }; |
| |
| std::ostream &operator<<(std::ostream &os, const QuadBoundaryPoint &point); |
| |
| // Holds a compacted blob index, the angle to the X axis from the center of the |
| // blob, and the coordinate of the point. |
| // |
| // The blob index is 12 bits, the angle is 28 bits, and the point is 24 bits. |
| struct IndexPoint { |
| // Max number of blob IDs we can hold. |
| static constexpr size_t kMaxBlobs = 2048; |
| |
| static constexpr size_t kRepEndBit = 24; |
| static constexpr size_t kBitsInKey = 64; |
| |
| __forceinline__ __host__ __device__ IndexPoint() : key(0) {} |
| |
| // Constructor to build a point with just the blob index, and point bits. The |
| // point bits should be grabbed from a QuadBoundaryPoint rather than built up |
| // by hand. |
| __forceinline__ __host__ __device__ IndexPoint(uint32_t blob_index, |
| uint32_t point_bits) |
| : key((static_cast<uint64_t>(blob_index & 0xfff) << 52) | |
| (static_cast<uint64_t>(point_bits & 0xffffff))) {} |
| |
| // Sets and gets the 12 bit blob index. |
| __forceinline__ __host__ __device__ void set_blob_index(uint32_t blob_index) { |
| key = (key & 0x000fffffffffffffull) | |
| (static_cast<uint64_t>(blob_index & 0xfff) << 52); |
| } |
| __forceinline__ __host__ __device__ uint32_t blob_index() const { |
| return ((key >> 52) & 0xfff); |
| } |
| |
| // Sets and gets the 28 bit angle. |
| __forceinline__ __host__ __device__ void set_theta(uint32_t theta) { |
| key = (key & 0xfff0000000ffffffull) | |
| (static_cast<uint64_t>(theta & 0xfffffff) << 24); |
| } |
| __forceinline__ __host__ __device__ uint32_t theta() const { |
| return ((key >> 24) & 0xfffffff); |
| } |
| |
| // See QuadBoundaryPoint for a description of the rest of these. |
| // Sets the 10 bit x and y. |
| __forceinline__ __host__ __device__ void set_base_xy(uint32_t x, uint32_t y) { |
| key = (key & 0xffffffffff00000full) | |
| (static_cast<uint64_t>(x & 0x3ff) << 14) | |
| (static_cast<uint64_t>(y & 0x3ff) << 4); |
| } |
| |
| __forceinline__ __host__ __device__ uint32_t base_x() const { |
| return ((key >> 14) & 0x3ff); |
| } |
| |
| __forceinline__ __host__ __device__ uint32_t base_y() const { |
| return ((key >> 4) & 0x3ff); |
| } |
| |
| __forceinline__ __host__ __device__ void set_dxy(uint64_t dxy) { |
| key = (key & 0xfffffffffffffffcull) | (static_cast<uint64_t>(dxy & 0x3)); |
| } |
| |
| __forceinline__ __host__ __device__ int32_t dx() const { |
| switch (key & 0x3) { |
| case 0: |
| return 1; |
| case 1: |
| return 1; |
| case 2: |
| return 0; |
| case 3: |
| return -1; |
| } |
| return 0; |
| } |
| |
| __forceinline__ __host__ __device__ int32_t dy() const { |
| switch (key & 0x3) { |
| case 0: |
| return 0; |
| case 1: |
| case 2: |
| case 3: |
| return 1; |
| } |
| return 0; |
| } |
| |
| __forceinline__ __host__ __device__ uint32_t x() const { |
| return static_cast<int32_t>(base_x() * 2) + dx(); |
| } |
| __forceinline__ __host__ __device__ uint32_t y() const { |
| return static_cast<int32_t>(base_y() * 2) + dy(); |
| } |
| |
| __forceinline__ __host__ __device__ int8_t gx() const { |
| return black_to_white() ? dx() : -dx(); |
| } |
| __forceinline__ __host__ __device__ int8_t gy() const { |
| return black_to_white() ? dy() : -dy(); |
| } |
| |
| __forceinline__ __host__ __device__ uint32_t point_bits() const { |
| return key & 0xffffff; |
| } |
| |
| __forceinline__ __host__ __device__ void set_black_to_white( |
| bool black_to_white) { |
| key = (key & 0xfffffffffffffff7ull) | |
| (static_cast<uint64_t>(black_to_white) << 3); |
| } |
| __forceinline__ __host__ __device__ bool black_to_white() const { |
| return (key & 0x8) != 0; |
| } |
| |
| // The key. This shouldn't be parsed directly. |
| uint64_t key; |
| }; |
| |
| std::ostream &operator<<(std::ostream &os, const IndexPoint &point); |
| |
| // Decomposer for sorting which just returns the key. |
| struct QuadBoundaryPointDecomposer { |
| __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()( |
| QuadBoundaryPoint &key) const { |
| return {key.key}; |
| } |
| }; |
| |
| // Decomposer for sorting which just returns the key. |
| struct QuadIndexPointDecomposer { |
| __host__ __device__ ::cuda::std::tuple<uint64_t &> operator()( |
| IndexPoint &key) const { |
| return {key.key}; |
| } |
| }; |
| |
| } // namespace frc971::apriltag |
| |
| #endif // FRC971_ORIN_POINTS_H_ |