Merge "Add redzones around lockless queue data"
diff --git a/aos/events/shm_event_loop_test.cc b/aos/events/shm_event_loop_test.cc
index d9a8872..2aeefb4 100644
--- a/aos/events/shm_event_loop_test.cc
+++ b/aos/events/shm_event_loop_test.cc
@@ -288,6 +288,29 @@
   EXPECT_LT(fetcher.context().data, private_memory.end());
 }
 
+// Tests that corrupting the bytes around the data buffer results in a crash.
+TEST_P(ShmEventLoopDeathTest, OutOfBoundsWrite) {
+  auto loop1 = factory()->Make("loop1");
+  std::unique_ptr<aos::RawSender> sender =
+      loop1->MakeRawSender(configuration::GetChannel(
+          loop1->configuration(), "/test", "aos.TestMessage", "", nullptr));
+  for (size_t i = 0; i < kChannelDataRedzone; ++i) {
+    SCOPED_TRACE(std::to_string(i));
+    EXPECT_DEATH(
+        {
+          ++static_cast<char *>(sender->data())[-1 - i];
+          sender->Send(0);
+        },
+        "Somebody wrote outside the buffer of their message");
+    EXPECT_DEATH(
+        {
+          ++static_cast<char *>(sender->data())[sender->size() + i];
+          sender->Send(0);
+        },
+        "Somebody wrote outside the buffer of their message");
+  }
+}
+
 // TODO(austin): Test that missing a deadline with a timer recovers as expected.
 
 INSTANTIATE_TEST_CASE_P(ShmEventLoopCopyTest, ShmEventLoopTest,
diff --git a/aos/ipc_lib/BUILD b/aos/ipc_lib/BUILD
index 6c14200..d5add53 100644
--- a/aos/ipc_lib/BUILD
+++ b/aos/ipc_lib/BUILD
@@ -165,6 +165,7 @@
         "//aos/time",
         "//aos/util:compiler_memory_barrier",
         "@com_github_google_glog//:glog",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
diff --git a/aos/ipc_lib/data_alignment.h b/aos/ipc_lib/data_alignment.h
index 2f59b78..72a7456 100644
--- a/aos/ipc_lib/data_alignment.h
+++ b/aos/ipc_lib/data_alignment.h
@@ -36,6 +36,10 @@
   return reinterpret_cast<char *>(rounded_data);
 }
 
+// The size of the redzone we maintain outside each message's data to help
+// detect out-of-bounds writes.
+static constexpr size_t kChannelDataRedzone = 32;
+
 }  // namespace aos
 
 #endif  // AOS_IPC_LIB_DATA_ALIGNMENT_H_
diff --git a/aos/ipc_lib/lockless_queue.cc b/aos/ipc_lib/lockless_queue.cc
index 4115769..6774489 100644
--- a/aos/ipc_lib/lockless_queue.cc
+++ b/aos/ipc_lib/lockless_queue.cc
@@ -435,7 +435,11 @@
 
 size_t LocklessQueueConfiguration::message_size() const {
   // Round up the message size so following data is aligned appropriately.
+  // Make sure to leave space to align the message data. It will be aligned
+  // relative to the start of the shared memory region, but that might not be
+  // aligned for some use cases.
   return LocklessQueueMemory::AlignmentRoundUp(message_data_size +
+                                               kChannelDataRedzone * 2 +
                                                (kChannelDataAlignment - 1)) +
          sizeof(Message);
 }
@@ -468,6 +472,67 @@
   return size;
 }
 
+// Calculates the starting byte for a redzone in shared memory. This starting
+// value is simply incremented for subsequent bytes.
+//
+// The result is based on the offset of the region in shared memor, to ensure it
+// is the same for each region when we generate and verify, but different for
+// each region to help catch forms of corruption like copying out-of-bounds data
+// from one place to another.
+//
+// memory is the base pointer to the shared memory. It is used to calculated
+// offsets. starting_data is the start of the redzone's data. Each one will
+// get a unique pattern.
+uint8_t RedzoneStart(const LocklessQueueMemory *memory,
+                     const char *starting_data) {
+  const auto memory_int = reinterpret_cast<uintptr_t>(memory);
+  const auto starting_int = reinterpret_cast<uintptr_t>(starting_data);
+  DCHECK(starting_int >= memory_int);
+  DCHECK(starting_int < memory_int + LocklessQueueMemorySize(memory->config));
+  const uintptr_t starting_offset = starting_int - memory_int;
+  // Just XOR the lower 2 bytes. They higher-order bytes are probably 0
+  // anyways.
+  return (starting_offset & 0xFF) ^ ((starting_offset >> 8) & 0xFF);
+}
+
+// Returns true if the given redzone has invalid data.
+bool CheckRedzone(const LocklessQueueMemory *memory,
+                  absl::Span<const char> redzone) {
+  uint8_t redzone_value = RedzoneStart(memory, redzone.data());
+
+  bool bad = false;
+
+  for (size_t i = 0; i < redzone.size(); ++i) {
+    if (memcmp(&redzone[i], &redzone_value, 1)) {
+      bad = true;
+    }
+    ++redzone_value;
+  }
+
+  return bad;
+}
+
+// Returns true if either of message's redzones has invalid data.
+bool CheckBothRedzones(const LocklessQueueMemory *memory,
+                       const Message *message) {
+  return CheckRedzone(memory,
+                      message->PreRedzone(memory->message_data_size())) ||
+         CheckRedzone(memory, message->PostRedzone(memory->message_data_size(),
+                                                   memory->message_size()));
+}
+
+// Fills the given redzone with the expected data.
+void FillRedzone(LocklessQueueMemory *memory, absl::Span<char> redzone) {
+  uint8_t redzone_value = RedzoneStart(memory, redzone.data());
+  for (size_t i = 0; i < redzone.size(); ++i) {
+    memcpy(&redzone[i], &redzone_value, 1);
+    ++redzone_value;
+  }
+
+  // Just double check that the implementations match.
+  CHECK(!CheckRedzone(memory, redzone));
+}
+
 LocklessQueueMemory *InitializeLocklessQueueMemory(
     LocklessQueueMemory *memory, LocklessQueueConfiguration config) {
   // Everything should be zero initialized already.  So we just need to fill
@@ -534,8 +599,12 @@
     CHECK_LE(num_messages, Index::MaxMessages());
 
     for (size_t i = 0; i < num_messages; ++i) {
-      memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i))
-          ->header.queue_index.Invalidate();
+      Message *const message =
+          memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i));
+      message->header.queue_index.Invalidate();
+      FillRedzone(memory, message->PreRedzone(memory->message_data_size()));
+      FillRedzone(memory, message->PostRedzone(memory->message_data_size(),
+                                               memory->message_size()));
     }
 
     for (size_t i = 0; i < memory->queue_size(); ++i) {
@@ -866,6 +935,8 @@
   // modifying it right now.
   const Index scratch_index = sender->scratch_index.RelaxedLoad();
   Message *const message = memory_->GetMessage(scratch_index);
+  CHECK(!CheckBothRedzones(memory_, message))
+      << ": Somebody wrote outside the buffer of their message";
 
   // We should have invalidated this when we first got the buffer. Verify that
   // in debug mode.
@@ -995,6 +1066,8 @@
     break;
   }
 
+  DCHECK(!CheckBothRedzones(memory_, memory_->GetMessage(to_replace)))
+      << ": Invalid message found in shared memory";
   // to_replace is our current scratch_index. It isn't in the queue, which means
   // nobody new can pin it. They can set their `pinned` to it, but they will
   // back it out, so they don't count. This means that we just need to find a
@@ -1003,6 +1076,9 @@
   // pinned then we'll look for a new one to use instead.
   const Index new_scratch =
       SwapPinnedSenderScratch(memory_, sender, to_replace);
+  DCHECK(!CheckBothRedzones(
+      memory_, memory_->GetMessage(sender->scratch_index.RelaxedLoad())))
+      << ": Invalid message found in shared memory";
 
   // If anybody is looking at this message (they shouldn't be), then try telling
   // them about it (best-effort).
@@ -1089,6 +1165,8 @@
   {
     const Index message_index = queue_slot->Load();
     Message *const message = memory_->GetMessage(message_index);
+    DCHECK(!CheckBothRedzones(memory_, message))
+        << ": Invalid message found in shared memory";
 
     const QueueIndex message_queue_index =
         message->header.queue_index.Load(queue_size);
@@ -1141,6 +1219,8 @@
   const Message *m = memory_->GetMessage(mi);
 
   while (true) {
+    DCHECK(!CheckBothRedzones(memory_, m))
+        << ": Invalid message found in shared memory";
     // We need to confirm that the data doesn't change while we are reading it.
     // Do that by first confirming that the message points to the queue index we
     // want.
@@ -1341,6 +1421,10 @@
     ::std::cout << "        size_t length = " << m->header.length
                 << ::std::endl;
     ::std::cout << "      }" << ::std::endl;
+    if (!CheckBothRedzones(memory, m)) {
+      ::std::cout << "      // *** DATA REDZONES ARE CORRUPTED ***"
+                  << ::std::endl;
+    }
     ::std::cout << "      data: {";
 
     if (FLAGS_dump_lockless_queue_data) {
diff --git a/aos/ipc_lib/lockless_queue.h b/aos/ipc_lib/lockless_queue.h
index 3cd3726..b71bd7f 100644
--- a/aos/ipc_lib/lockless_queue.h
+++ b/aos/ipc_lib/lockless_queue.h
@@ -7,6 +7,8 @@
 #include <optional>
 #include <vector>
 
+#include "absl/types/span.h"
+
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/ipc_lib/data_alignment.h"
 #include "aos/ipc_lib/index.h"
@@ -93,16 +95,64 @@
     size_t length;
   } header;
 
-  char *data(size_t message_size) { return RoundedData(message_size); }
-  const char *data(size_t message_size) const {
-    return RoundedData(message_size);
+  // Returns the start of the data buffer, given that message_data_size is
+  // the same one used to allocate this message's memory.
+  char *data(size_t message_data_size) {
+    return RoundedData(message_data_size);
+  }
+  const char *data(size_t message_data_size) const {
+    return RoundedData(message_data_size);
+  }
+
+  // Returns the pre-buffer redzone, given that message_data_size is the same
+  // one used to allocate this message's memory.
+  absl::Span<char> PreRedzone(size_t message_data_size) {
+    char *const end = data(message_data_size);
+    const auto result =
+        absl::Span<char>(&data_pointer[0], end - &data_pointer[0]);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment);
+    return result;
+  }
+  absl::Span<const char> PreRedzone(size_t message_data_size) const {
+    const char *const end = data(message_data_size);
+    const auto result =
+        absl::Span<const char>(&data_pointer[0], end - &data_pointer[0]);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment);
+    return result;
+  }
+
+  // Returns the post-buffer redzone, given that message_data_size is the same
+  // one used to allocate this message's memory.
+  absl::Span<char> PostRedzone(size_t message_data_size, size_t message_size) {
+    DCHECK_LT(message_data_size, message_size);
+    char *const redzone_end = reinterpret_cast<char *>(this) + message_size;
+    char *const data_end = data(message_data_size) + message_data_size;
+    DCHECK_GT(static_cast<void *>(redzone_end), static_cast<void *>(data_end));
+    const auto result = absl::Span<char>(data_end, redzone_end - data_end);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment * 2);
+    return result;
+  }
+  absl::Span<const char> PostRedzone(size_t message_data_size,
+                                     size_t message_size) const {
+    DCHECK_LT(message_data_size, message_size);
+    const char *const redzone_end =
+        reinterpret_cast<const char *>(this) + message_size;
+    const char *const data_end = data(message_data_size) + message_data_size;
+    DCHECK_GT(static_cast<const void *>(redzone_end),
+              static_cast<const void *>(data_end));
+    const auto result =
+        absl::Span<const char>(data_end, redzone_end - data_end);
+    DCHECK_LT(result.size(), kChannelDataRedzone + kChannelDataAlignment * 2);
+    return result;
   }
 
  private:
-  // This returns a non-const pointer into a const object. Be very careful about
-  // const correctness in publicly accessible APIs using it.
-  char *RoundedData(size_t message_size) const {
-    return RoundChannelData(const_cast<char *>(&data_pointer[0]), message_size);
+  // This returns a non-const pointer into a const object. Be very careful
+  // about const correctness in publicly accessible APIs using it.
+  char *RoundedData(size_t message_data_size) const {
+    return RoundChannelData(
+        const_cast<char *>(&data_pointer[0] + kChannelDataRedzone),
+        message_data_size);
   }
 
   char data_pointer[];
diff --git a/aos/ipc_lib/lockless_queue_test.cc b/aos/ipc_lib/lockless_queue_test.cc
index e1e2516..6510501 100644
--- a/aos/ipc_lib/lockless_queue_test.cc
+++ b/aos/ipc_lib/lockless_queue_test.cc
@@ -317,8 +317,32 @@
   }
 }
 
+namespace {
+
+// Temporarily pins the current thread to CPUs 0 and 1.
+// This speeds up the test on some machines a lot (~4x). It also preserves
+// opportunities for the 2 threads to race each other.
+class PinForTest {
+ public:
+  PinForTest() {
+    PCHECK(sched_getaffinity(0, sizeof(old_), &old_) == 0);
+    cpu_set_t new_set;
+    CPU_ZERO(&new_set);
+    CPU_SET(0, &new_set);
+    CPU_SET(1, &new_set);
+    PCHECK(sched_setaffinity(0, sizeof(new_set), &new_set) == 0);
+  }
+  ~PinForTest() { PCHECK(sched_setaffinity(0, sizeof(old_), &old_) == 0); }
+
+ private:
+  cpu_set_t old_;
+};
+
+}  // namespace
+
 // Send enough messages to wrap the 32 bit send counter.
 TEST_F(LocklessQueueTest, WrappedSend) {
+  PinForTest pin_cpu;
   uint64_t kNumMessages = 0x100010000ul;
   QueueRacer racer(queue(), 1, kNumMessages);