Force static flatbuffer memory to be aligned

For buffers which required lots of alignment, we were allocating more
space than needed, and manually aligning inside that space.  That
doesn't have a good enough contract to ensure that if the flatbuffer is
loaded back into RAM, it would still be aligned well enough for the
requirements.

Instead, we need to push the alignment requirement out to the allocator,
and then make sure we stay aligned inside the buffer.  The std::vector<>
allocator isn't guarenteed to be aligned, so switch everything over to
the AlignedVectorAllocator instead which is aligned.

Change-Id: Ice2aa1316914472f2a3d55f470a4dc957e2caa3c
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/flatbuffers/base.cc b/aos/flatbuffers/base.cc
index 97b3b36..f22fce9 100644
--- a/aos/flatbuffers/base.cc
+++ b/aos/flatbuffers/base.cc
@@ -136,39 +136,8 @@
   }
 }
 
-std::optional<std::span<uint8_t>> VectorAllocator::Allocate(
-    size_t size, size_t /*alignment*/, SetZero set_zero) {
-  CHECK(buffer_.empty()) << ": Must deallocate before calling Allocate().";
-  buffer_.resize(size);
-  if (set_zero == SetZero::kYes) {
-    memset(buffer_.data(), 0, buffer_.size());
-  }
-  return std::span<uint8_t>{buffer_.data(), buffer_.size()};
-}
-
-std::optional<std::span<uint8_t>> VectorAllocator::InsertBytes(
-    void *insertion_point, size_t bytes, size_t /*alignment*/, SetZero) {
-  const ssize_t insertion_index =
-      reinterpret_cast<uint8_t *>(insertion_point) - buffer_.data();
-  CHECK_LE(0, insertion_index);
-  CHECK_LE(insertion_index, static_cast<ssize_t>(buffer_.size()));
-  buffer_.insert(buffer_.begin() + insertion_index, bytes, 0);
-  return std::span<uint8_t>{buffer_.data(), buffer_.size()};
-}
-
-std::span<uint8_t> VectorAllocator::RemoveBytes(
-    std::span<uint8_t> remove_bytes) {
-  const ssize_t removal_index = remove_bytes.data() - buffer_.data();
-  CHECK_LE(0, removal_index);
-  CHECK_LE(removal_index, static_cast<ssize_t>(buffer_.size()));
-  CHECK_LE(removal_index + remove_bytes.size(), buffer_.size());
-  buffer_.erase(buffer_.begin() + removal_index,
-                buffer_.begin() + removal_index + remove_bytes.size());
-  return {buffer_.data(), buffer_.size()};
-}
-
 std::optional<std::span<uint8_t>> SpanAllocator::Allocate(size_t size,
-                                                          size_t /*alignment*/,
+                                                          size_t alignment,
                                                           SetZero set_zero) {
   CHECK(!allocated_);
   if (size > buffer_.size()) {
@@ -179,6 +148,10 @@
   }
   allocated_size_ = size;
   allocated_ = true;
+  CHECK_GT(alignment, 0u);
+  CHECK_EQ(buffer_.size() % alignment, 0u)
+      << ": Buffer isn't a multiple of alignment " << alignment << " long, is "
+      << buffer_.size() << " long";
   return internal::GetSubSpan(buffer_, buffer_.size() - size);
 }
 
@@ -223,6 +196,97 @@
   allocated_ = false;
 }
 
+AlignedVectorAllocator::~AlignedVectorAllocator() {
+  CHECK(buffer_.empty())
+      << ": Must deallocate before destroying the AlignedVectorAllocator.";
+}
+
+std::optional<std::span<uint8_t>> AlignedVectorAllocator::Allocate(
+    size_t size, size_t /*alignment*/, fbs::SetZero set_zero) {
+  CHECK(buffer_.empty()) << ": Must deallocate before calling Allocate().";
+  buffer_.resize(((size + kAlignment - 1) / kAlignment) * kAlignment);
+  allocated_size_ = size;
+  if (set_zero == fbs::SetZero::kYes) {
+    memset(buffer_.data(), 0, buffer_.size());
+  }
+
+  return std::span<uint8_t>{data(), allocated_size_};
+}
+
+std::optional<std::span<uint8_t>> AlignedVectorAllocator::InsertBytes(
+    void *insertion_point, size_t bytes, size_t /*alignment*/,
+    fbs::SetZero set_zero) {
+  DCHECK_GE(reinterpret_cast<const uint8_t *>(insertion_point), data());
+  DCHECK_LE(reinterpret_cast<const uint8_t *>(insertion_point),
+            data() + allocated_size_);
+  const size_t buffer_offset =
+      reinterpret_cast<const uint8_t *>(insertion_point) - data();
+  // TODO(austin): This has an extra memcpy in it that isn't strictly needed
+  // when we resize.  Remove it if performance is a concern.
+  const size_t absolute_buffer_offset =
+      reinterpret_cast<const uint8_t *>(insertion_point) - buffer_.data();
+  const size_t previous_size = buffer_.size();
+
+  buffer_.resize(((allocated_size_ + bytes + kAlignment - 1) / kAlignment) *
+                 kAlignment);
+
+  // Now, we've got space both before and after the block of data.  Move the
+  // data after to the end, and the data before to the start.
+
+  const size_t new_space_after = buffer_.size() - previous_size;
+
+  // Move the rest of the data to be end aligned.  If the buffer wasn't resized,
+  // this will be a nop.
+  memmove(buffer_.data() + absolute_buffer_offset + new_space_after,
+          buffer_.data() + absolute_buffer_offset,
+          previous_size - absolute_buffer_offset);
+
+  // Now, move the data at the front to be aligned too.
+  memmove(buffer_.data() + buffer_.size() - (allocated_size_ + bytes),
+          buffer_.data() + previous_size - allocated_size_,
+          allocated_size_ - (previous_size - absolute_buffer_offset));
+
+  if (set_zero == fbs::SetZero::kYes) {
+    memset(data() - bytes + buffer_offset, 0, bytes);
+  }
+  allocated_size_ += bytes;
+
+  return std::span<uint8_t>{data(), allocated_size_};
+}
+
+std::span<uint8_t> AlignedVectorAllocator::RemoveBytes(
+    std::span<uint8_t> remove_bytes) {
+  const ssize_t removal_index = remove_bytes.data() - buffer_.data();
+  const size_t old_start_index = buffer_.size() - allocated_size_;
+  CHECK_LE(static_cast<ssize_t>(old_start_index), removal_index);
+  CHECK_LE(removal_index, static_cast<ssize_t>(buffer_.size()));
+  CHECK_LE(removal_index + remove_bytes.size(), buffer_.size());
+  uint8_t *old_buffer_start = buffer_.data() + old_start_index;
+  memmove(old_buffer_start + remove_bytes.size(), old_buffer_start,
+          removal_index - old_start_index);
+  allocated_size_ -= remove_bytes.size();
+
+  return std::span<uint8_t>{data(), allocated_size_};
+}
+
+void AlignedVectorAllocator::Deallocate(std::span<uint8_t>) {
+  if (!released_) {
+    CHECK(!buffer_.empty())
+        << ": Called Deallocate() without a prior allocation.";
+  }
+  released_ = false;
+  buffer_.resize(0);
+}
+
+aos::SharedSpan AlignedVectorAllocator::Release() {
+  absl::Span<uint8_t> span{data(), allocated_size_};
+  std::shared_ptr<SharedSpanHolder> result = std::make_shared<SharedSpanHolder>(
+      std::move(buffer_), absl::Span<const uint8_t>());
+  result->span = span;
+  released_ = true;
+  return aos::SharedSpan(result, &(result->span));
+}
+
 namespace internal {
 std::ostream &DebugBytes(std::span<const uint8_t> span, std::ostream &os) {
   constexpr size_t kRowSize = 8u;