Speed up cameras to 25fps by parallelizing solves

Running solves in parallel is much more efficient than parallelizing
each one. This lets us comfortably process images at 25fps.

Change-Id: I572ae60cf70b183d2a7716fe53c80475f862f24d
diff --git a/y2019/vision/target_sender.cc b/y2019/vision/target_sender.cc
index 72e7939..537e209 100644
--- a/y2019/vision/target_sender.cc
+++ b/y2019/vision/target_sender.cc
@@ -1,7 +1,10 @@
 #include "y2019/vision/target_finder.h"
 
+#include <condition_variable>
 #include <fstream>
+#include <mutex>
 #include <random>
+#include <thread>
 
 #include "aos/vision/blob/codec.h"
 #include "aos/vision/blob/find_blob.h"
@@ -67,6 +70,96 @@
 using aos::vision::ImageRange;
 using aos::vision::RangeImage;
 using aos::vision::ImageFormat;
+using y2019::vision::TargetFinder;
+using y2019::vision::IntermediateResult;
+using y2019::vision::Target;
+
+class TargetProcessPool {
+ public:
+  // The number of threads we'll use.
+  static constexpr int kThreads = 4;
+
+  TargetProcessPool(TargetFinder *finder);
+  ~TargetProcessPool();
+
+  std::vector<IntermediateResult> Process(std::vector<const Target *> &&inputs,
+                                          bool verbose);
+
+ private:
+  // The main function for a thread.
+  void RunThread();
+
+  std::array<std::thread, kThreads> threads_;
+  // Coordinates access to results_/inputs_ and coordinates with
+  // condition_variable_.
+  std::mutex mutex_;
+  // Signals changes to results_/inputs_ and quit_.
+  std::condition_variable condition_variable_;
+  bool quit_ = false;
+
+  bool verbose_ = false;
+  std::vector<const Target *> inputs_;
+  std::vector<IntermediateResult> results_;
+
+  TargetFinder *const finder_;
+};
+
+TargetProcessPool::TargetProcessPool(TargetFinder *finder) : finder_(finder) {
+  for (int i = 0; i < kThreads; ++i) {
+    threads_[i] = std::thread([this]() { RunThread(); });
+  }
+}
+
+TargetProcessPool::~TargetProcessPool() {
+  {
+    std::unique_lock<std::mutex> locker(mutex_);
+    quit_ = true;
+    condition_variable_.notify_all();
+  }
+  for (int i = 0; i < kThreads; ++i) {
+    threads_[i].join();
+  }
+}
+
+std::vector<IntermediateResult> TargetProcessPool::Process(
+    std::vector<const Target *> &&inputs, bool verbose) {
+  inputs_ = std::move(inputs);
+  results_.clear();
+  verbose_ = verbose;
+  const size_t number_targets = inputs_.size();
+  {
+    std::unique_lock<std::mutex> locker(mutex_);
+    condition_variable_.notify_all();
+    while (results_.size() < number_targets) {
+      condition_variable_.wait(locker);
+    }
+  }
+  return std::move(results_);
+}
+
+void TargetProcessPool::RunThread() {
+  while (true) {
+    const Target *my_input;
+    {
+      std::unique_lock<std::mutex> locker(mutex_);
+      while (inputs_.empty()) {
+        if (quit_) {
+          return;
+        }
+        condition_variable_.wait(locker);
+      }
+      my_input = inputs_.back();
+      inputs_.pop_back();
+    }
+    IntermediateResult my_output =
+        finder_->ProcessTargetToResult(*my_input, false);
+    {
+      std::unique_lock<std::mutex> locker(mutex_);
+      results_.emplace_back(std::move(my_output));
+      condition_variable_.notify_all();
+    }
+  }
+}
 
 int main(int argc, char **argv) {
   (void)argc;
@@ -83,7 +176,8 @@
   // dup2(itsDev, 1);
   // dup2(itsDev, 2);
 
-  TargetFinder finder_;
+  TargetFinder finder;
+  TargetProcessPool process_pool(&finder);
   ImageWriter writer;
   uint32_t image_count = 0;
   bool log_images = false;
@@ -92,7 +186,7 @@
   params0.set_exposure(50);
   params0.set_brightness(40);
   params0.set_width(640);
-  params0.set_fps(15);
+  params0.set_fps(25);
   params0.set_height(480);
 
   aos::vision::FastYuyvYPooledThresholder thresholder;
@@ -108,18 +202,18 @@
     aos::vision::ImageFormat fmt{640, 480};
     aos::vision::BlobList imgs =
         aos::vision::FindBlobs(thresholder.Threshold(fmt, data.data(), 120));
-    finder_.PreFilter(&imgs);
+    finder.PreFilter(&imgs);
     LOG(INFO) << "Blobs: " << imgs.size();
 
     constexpr bool verbose = false;
     ::std::vector<Polygon> raw_polys;
     for (const RangeImage &blob : imgs) {
       // Convert blobs to contours in the corrected space.
-      ContourNode *contour = finder_.GetContour(blob);
+      ContourNode *contour = finder.GetContour(blob);
       ::std::vector<::Eigen::Vector2f> unwarped_contour =
-          finder_.UnWarpContour(contour);
+          finder.UnWarpContour(contour);
       const Polygon polygon =
-          finder_.FindPolygon(::std::move(unwarped_contour), verbose);
+          finder.FindPolygon(::std::move(unwarped_contour), verbose);
       if (!polygon.segments.empty()) {
         raw_polys.push_back(polygon);
       }
@@ -128,12 +222,12 @@
 
     // Calculate each component side of a possible target.
     ::std::vector<TargetComponent> target_component_list =
-        finder_.FillTargetComponentList(raw_polys, verbose);
+        finder.FillTargetComponentList(raw_polys, verbose);
     LOG(INFO) << "Components: " << target_component_list.size();
 
     // Put the compenents together into targets.
     ::std::vector<Target> target_list =
-        finder_.FindTargetsFromComponents(target_component_list, verbose);
+        finder.FindTargetsFromComponents(target_component_list, verbose);
     static constexpr size_t kMaximumPotentialTargets = 8;
     LOG(INFO) << "Potential Targets (will filter to "
               << kMaximumPotentialTargets << "): " << target_list.size();
@@ -157,14 +251,15 @@
     }
 
     // Use the solver to generate an intermediate version of our results.
-    ::std::vector<IntermediateResult> results;
+    std::vector<const Target *> inputs;
     for (size_t index : target_list_indices) {
-      const Target &target = target_list[index];
-      results.emplace_back(finder_.ProcessTargetToResult(target, verbose));
+      inputs.push_back(&target_list[index]);
     }
+    std::vector<IntermediateResult> results =
+        process_pool.Process(std::move(inputs), verbose);
     LOG(INFO) << "Raw Results: " << results.size();
 
-    results = finder_.FilterResults(results, 30, verbose);
+    results = finder.FilterResults(results, 30, verbose);
     LOG(INFO) << "Results: " << results.size();
 
     // TODO: Select top 3 (randomly?)
@@ -223,7 +318,7 @@
               frc971::jevois::UartUnpackToCamera(packet);
           if (calibration_question) {
             const auto &calibration = *calibration_question;
-            IntrinsicParams *intrinsics = finder_.mutable_intrinsics();
+            IntrinsicParams *intrinsics = finder.mutable_intrinsics();
             intrinsics->mount_angle = calibration.calibration(0, 0);
             intrinsics->focal_length = calibration.calibration(0, 1);
             intrinsics->barrel_mount = calibration.calibration(0, 2);