Add first half of GPU based april tag detector

This detects blob boundaries, filters them, and then orders the points
in a circle in preparation for line fitting.  It takes 2ms for a 720p
image on the Orin NX 8gb.

Future commits will do the quad fitting and merge back with the original
algorithm.

Change-Id: Idf2869b3521e50a0056a352138d864b409dab6f1
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/orin/points.cc b/frc971/orin/points.cc
new file mode 100644
index 0000000..dbf5496
--- /dev/null
+++ b/frc971/orin/points.cc
@@ -0,0 +1,35 @@
+#include "frc971/orin/points.h"
+
+#include <iomanip>
+#include <ostream>
+
+namespace frc971 {
+namespace apriltag {
+
+std::ostream &operator<<(std::ostream &os, const QuadBoundaryPoint &point) {
+  std::ios_base::fmtflags original_flags = os.flags();
+
+  os << "key:" << std::hex << std::setw(16) << std::setfill('0') << point.key
+     << " rep01:" << std::setw(10) << point.rep01() << " pt:" << std::setw(6)
+     << point.point_bits();
+  os.flags(original_flags);
+  return os;
+}
+
+static_assert(sizeof(QuadBoundaryPoint) == 8,
+              "QuadBoundaryPoint didn't pack right.");
+
+std::ostream &operator<<(std::ostream &os, const IndexPoint &point) {
+  std::ios_base::fmtflags original_flags = os.flags();
+
+  os << "key:" << std::hex << std::setw(16) << std::setfill('0') << point.key
+     << " i:" << std::setw(3) << point.blob_index() << " t:" << std::setw(7)
+     << point.theta() << " p:" << std::setw(6) << point.point_bits();
+  os.flags(original_flags);
+  return os;
+}
+
+static_assert(sizeof(IndexPoint) == 8, "IndexPoint didn't pack right.");
+
+}  // namespace apriltag
+}  // namespace frc971