Add redzones around lockless queue data

This will help catch writers corrupting memory by writing outside the
bounds of their buffers.

No significant impact on CPU load or memory usage.

Change-Id: Ica78b967d0251befd2a99b1039ddbfb145ff033d
diff --git a/aos/events/shm_event_loop_test.cc b/aos/events/shm_event_loop_test.cc
index d9a8872..2aeefb4 100644
--- a/aos/events/shm_event_loop_test.cc
+++ b/aos/events/shm_event_loop_test.cc
@@ -288,6 +288,29 @@
   EXPECT_LT(fetcher.context().data, private_memory.end());
 }
 
+// Tests that corrupting the bytes around the data buffer results in a crash.
+TEST_P(ShmEventLoopDeathTest, OutOfBoundsWrite) {
+  auto loop1 = factory()->Make("loop1");
+  std::unique_ptr<aos::RawSender> sender =
+      loop1->MakeRawSender(configuration::GetChannel(
+          loop1->configuration(), "/test", "aos.TestMessage", "", nullptr));
+  for (size_t i = 0; i < kChannelDataRedzone; ++i) {
+    SCOPED_TRACE(std::to_string(i));
+    EXPECT_DEATH(
+        {
+          ++static_cast<char *>(sender->data())[-1 - i];
+          sender->Send(0);
+        },
+        "Somebody wrote outside the buffer of their message");
+    EXPECT_DEATH(
+        {
+          ++static_cast<char *>(sender->data())[sender->size() + i];
+          sender->Send(0);
+        },
+        "Somebody wrote outside the buffer of their message");
+  }
+}
+
 // TODO(austin): Test that missing a deadline with a timer recovers as expected.
 
 INSTANTIATE_TEST_CASE_P(ShmEventLoopCopyTest, ShmEventLoopTest,
diff --git a/aos/ipc_lib/BUILD b/aos/ipc_lib/BUILD
index 6c14200..d5add53 100644
--- a/aos/ipc_lib/BUILD
+++ b/aos/ipc_lib/BUILD
@@ -165,6 +165,7 @@
         "//aos/time",
         "//aos/util:compiler_memory_barrier",
         "@com_github_google_glog//:glog",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
diff --git a/aos/ipc_lib/data_alignment.h b/aos/ipc_lib/data_alignment.h
index 2f59b78..72a7456 100644
--- a/aos/ipc_lib/data_alignment.h
+++ b/aos/ipc_lib/data_alignment.h
@@ -36,6 +36,10 @@
   return reinterpret_cast<char *>(rounded_data);
 }
 
+// The size of the redzone we maintain outside each message's data to help
+// detect out-of-bounds writes.
+static constexpr size_t kChannelDataRedzone = 32;
+
 }  // namespace aos
 
 #endif  // AOS_IPC_LIB_DATA_ALIGNMENT_H_
diff --git a/aos/ipc_lib/lockless_queue.cc b/aos/ipc_lib/lockless_queue.cc
index 4115769..6774489 100644
--- a/aos/ipc_lib/lockless_queue.cc
+++ b/aos/ipc_lib/lockless_queue.cc
@@ -435,7 +435,11 @@
 
 size_t LocklessQueueConfiguration::message_size() const {
   // Round up the message size so following data is aligned appropriately.
+  // Make sure to leave space to align the message data. It will be aligned
+  // relative to the start of the shared memory region, but that might not be
+  // aligned for some use cases.
   return LocklessQueueMemory::AlignmentRoundUp(message_data_size +
+                                               kChannelDataRedzone * 2 +
                                                (kChannelDataAlignment - 1)) +
          sizeof(Message);
 }
@@ -468,6 +472,67 @@
   return size;
 }
 
+// Calculates the starting byte for a redzone in shared memory. This starting
+// value is simply incremented for subsequent bytes.
+//
+// The result is based on the offset of the region in shared memor, to ensure it
+// is the same for each region when we generate and verify, but different for
+// each region to help catch forms of corruption like copying out-of-bounds data
+// from one place to another.
+//
+// memory is the base pointer to the shared memory. It is used to calculated
+// offsets. starting_data is the start of the redzone's data. Each one will
+// get a unique pattern.
+uint8_t RedzoneStart(const LocklessQueueMemory *memory,
+                     const char *starting_data) {
+  const auto memory_int = reinterpret_cast<uintptr_t>(memory);
+  const auto starting_int = reinterpret_cast<uintptr_t>(starting_data);
+  DCHECK(starting_int >= memory_int);
+  DCHECK(starting_int < memory_int + LocklessQueueMemorySize(memory->config));
+  const uintptr_t starting_offset = starting_int - memory_int;
+  // Just XOR the lower 2 bytes. They higher-order bytes are probably 0
+  // anyways.
+  return (starting_offset & 0xFF) ^ ((starting_offset >> 8) & 0xFF);
+}
+
+// Returns true if the given redzone has invalid data.
+bool CheckRedzone(const LocklessQueueMemory *memory,
+                  absl::Span<const char> redzone) {
+  uint8_t redzone_value = RedzoneStart(memory, redzone.data());
+
+  bool bad = false;
+
+  for (size_t i = 0; i < redzone.size(); ++i) {
+    if (memcmp(&redzone[i], &redzone_value, 1)) {
+      bad = true;
+    }
+    ++redzone_value;
+  }
+
+  return bad;
+}
+
+// Returns true if either of message's redzones has invalid data.
+bool CheckBothRedzones(const LocklessQueueMemory *memory,
+                       const Message *message) {
+  return CheckRedzone(memory,
+                      message->PreRedzone(memory->message_data_size())) ||
+         CheckRedzone(memory, message->PostRedzone(memory->message_data_size(),
+                                                   memory->message_size()));
+}
+
+// Fills the given redzone with the expected data.
+void FillRedzone(LocklessQueueMemory *memory, absl::Span<char> redzone) {
+  uint8_t redzone_value = RedzoneStart(memory, redzone.data());
+  for (size_t i = 0; i < redzone.size(); ++i) {
+    memcpy(&redzone[i], &redzone_value, 1);
+    ++redzone_value;
+  }
+
+  // Just double check that the implementations match.
+  CHECK(!CheckRedzone(memory, redzone));
+}
+
 LocklessQueueMemory *InitializeLocklessQueueMemory(
     LocklessQueueMemory *memory, LocklessQueueConfiguration config) {
   // Everything should be zero initialized already.  So we just need to fill
@@ -534,8 +599,12 @@
     CHECK_LE(num_messages, Index::MaxMessages());
 
     for (size_t i = 0; i < num_messages; ++i) {
-      memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i))
-          ->header.queue_index.Invalidate();
+      Message *const message =
+          memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i));
+      message->header.queue_index.Invalidate();
+      FillRedzone(memory, message->PreRedzone(memory->message_data_size()));
+      FillRedzone(memory, message->PostRedzone(memory->message_data_size(),
+                                               memory->message_size()));
     }
 
     for (size_t i = 0; i < memory->queue_size(); ++i) {
@@ -866,6 +935,8 @@
   // modifying it right now.
   const Index scratch_index = sender->scratch_index.RelaxedLoad();
   Message *const message = memory_->GetMessage(scratch_index);
+  CHECK(!CheckBothRedzones(memory_, message))
+      << ": Somebody wrote outside the buffer of their message";
 
   // We should have invalidated this when we first got the buffer. Verify that
   // in debug mode.
@@ -995,6 +1066,8 @@
     break;
   }
 
+  DCHECK(!CheckBothRedzones(memory_, memory_->GetMessage(to_replace)))
+      << ": Invalid message found in shared memory";
   // to_replace is our current scratch_index. It isn't in the queue, which means
   // nobody new can pin it. They can set their `pinned` to it, but they will
   // back it out, so they don't count. This means that we just need to find a
@@ -1003,6 +1076,9 @@
   // pinned then we'll look for a new one to use instead.
   const Index new_scratch =
       SwapPinnedSenderScratch(memory_, sender, to_replace);
+  DCHECK(!CheckBothRedzones(
+      memory_, memory_->GetMessage(sender->scratch_index.RelaxedLoad())))
+      << ": Invalid message found in shared memory";
 
   // If anybody is looking at this message (they shouldn't be), then try telling
   // them about it (best-effort).
@@ -1089,6 +1165,8 @@
   {
     const Index message_index = queue_slot->Load();
     Message *const message = memory_->GetMessage(message_index);
+    DCHECK(!CheckBothRedzones(memory_, message))
+        << ": Invalid message found in shared memory";
 
     const QueueIndex message_queue_index =
         message->header.queue_index.Load(queue_size);
@@ -1141,6 +1219,8 @@
   const Message *m = memory_->GetMessage(mi);
 
   while (true) {
+    DCHECK(!CheckBothRedzones(memory_, m))
+        << ": Invalid message found in shared memory";
     // We need to confirm that the data doesn't change while we are reading it.
     // Do that by first confirming that the message points to the queue index we
     // want.
@@ -1341,6 +1421,10 @@
     ::std::cout << "        size_t length = " << m->header.length
                 << ::std::endl;
     ::std::cout << "      }" << ::std::endl;
+    if (!CheckBothRedzones(memory, m)) {
+      ::std::cout << "      // *** DATA REDZONES ARE CORRUPTED ***"
+                  << ::std::endl;
+    }
     ::std::cout << "      data: {";
 
     if (FLAGS_dump_lockless_queue_data) {
diff --git a/aos/ipc_lib/lockless_queue.h b/aos/ipc_lib/lockless_queue.h
index 3cd3726..b71bd7f 100644
--- a/aos/ipc_lib/lockless_queue.h
+++ b/aos/ipc_lib/lockless_queue.h
@@ -7,6 +7,8 @@
 #include <optional>
 #include <vector>
 
+#include "absl/types/span.h"
+
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/ipc_lib/data_alignment.h"
 #include "aos/ipc_lib/index.h"
@@ -93,16 +95,64 @@
     size_t length;
   } header;
 
-  char *data(size_t message_size) { return RoundedData(message_size); }
-  const char *data(size_t message_size) const {
-    return RoundedData(message_size);
+  // Returns the start of the data buffer, given that message_data_size is
+  // the same one used to allocate this message's memory.
+  char *data(size_t message_data_size) {
+    return RoundedData(message_data_size);
+  }
+  const char *data(size_t message_data_size) const {
+    return RoundedData(message_data_size);
+  }
+
+  // Returns the pre-buffer redzone, given that message_data_size is the same
+  // one used to allocate this message's memory.
+  absl::Span<char> PreRedzone(size_t message_data_size) {
+    char *const end = data(message_data_size);
+    const auto result =
+        absl::Span<char>(&data_pointer[0], end - &data_pointer[0]);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment);
+    return result;
+  }
+  absl::Span<const char> PreRedzone(size_t message_data_size) const {
+    const char *const end = data(message_data_size);
+    const auto result =
+        absl::Span<const char>(&data_pointer[0], end - &data_pointer[0]);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment);
+    return result;
+  }
+
+  // Returns the post-buffer redzone, given that message_data_size is the same
+  // one used to allocate this message's memory.
+  absl::Span<char> PostRedzone(size_t message_data_size, size_t message_size) {
+    DCHECK_LT(message_data_size, message_size);
+    char *const redzone_end = reinterpret_cast<char *>(this) + message_size;
+    char *const data_end = data(message_data_size) + message_data_size;
+    DCHECK_GT(static_cast<void *>(redzone_end), static_cast<void *>(data_end));
+    const auto result = absl::Span<char>(data_end, redzone_end - data_end);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment * 2);
+    return result;
+  }
+  absl::Span<const char> PostRedzone(size_t message_data_size,
+                                     size_t message_size) const {
+    DCHECK_LT(message_data_size, message_size);
+    const char *const redzone_end =
+        reinterpret_cast<const char *>(this) + message_size;
+    const char *const data_end = data(message_data_size) + message_data_size;
+    DCHECK_GT(static_cast<const void *>(redzone_end),
+              static_cast<const void *>(data_end));
+    const auto result =
+        absl::Span<const char>(data_end, redzone_end - data_end);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment * 2);
+    return result;
   }
 
  private:
-  // This returns a non-const pointer into a const object. Be very careful about
-  // const correctness in publicly accessible APIs using it.
-  char *RoundedData(size_t message_size) const {
-    return RoundChannelData(const_cast<char *>(&data_pointer[0]), message_size);
+  // This returns a non-const pointer into a const object. Be very careful
+  // about const correctness in publicly accessible APIs using it.
+  char *RoundedData(size_t message_data_size) const {
+    return RoundChannelData(
+        const_cast<char *>(&data_pointer[0] + kChannelDataRedzone),
+        message_data_size);
   }
 
   char data_pointer[];
diff --git a/aos/ipc_lib/lockless_queue_test.cc b/aos/ipc_lib/lockless_queue_test.cc
index e1e2516..6510501 100644
--- a/aos/ipc_lib/lockless_queue_test.cc
+++ b/aos/ipc_lib/lockless_queue_test.cc
@@ -317,8 +317,32 @@
   }
 }
 
+namespace {
+
+// Temporarily pins the current thread to CPUs 0 and 1.
+// This speeds up the test on some machines a lot (~4x). It also preserves
+// opportunities for the 2 threads to race each other.
+class PinForTest {
+ public:
+  PinForTest() {
+    PCHECK(sched_getaffinity(0, sizeof(old_), &old_) == 0);
+    cpu_set_t new_set;
+    CPU_ZERO(&new_set);
+    CPU_SET(0, &new_set);
+    CPU_SET(1, &new_set);
+    PCHECK(sched_setaffinity(0, sizeof(new_set), &new_set) == 0);
+  }
+  ~PinForTest() { PCHECK(sched_setaffinity(0, sizeof(old_), &old_) == 0); }
+
+ private:
+  cpu_set_t old_;
+};
+
+}  // namespace
+
 // Send enough messages to wrap the 32 bit send counter.
 TEST_F(LocklessQueueTest, WrappedSend) {
+  PinForTest pin_cpu;
   uint64_t kNumMessages = 0x100010000ul;
   QueueRacer racer(queue(), 1, kNumMessages);