Improve logger behavior when out of disk space
I actually tried it, and found a few places that didn't fully work.
Might still be more, but it's definitely closer now.
It'd be nice to test this stuff, but it's hard to set up a valid test.
Change-Id: If2ad1b9d91a116515215eaa49140b1aa0230fc93
diff --git a/aos/events/logging/log_namer.cc b/aos/events/logging/log_namer.cc
index 748da7c..92ee9b3 100644
--- a/aos/events/logging/log_namer.cc
+++ b/aos/events/logging/log_namer.cc
@@ -279,18 +279,30 @@
}
const std::string filename = absl::StrCat(base_name_, path, temp_suffix_);
if (!destination->get()) {
- all_filenames_.emplace_back(path);
+ if (ran_out_of_space_) {
+ *destination = std::make_unique<DetachedBufferWriter>(
+ DetachedBufferWriter::already_out_of_space_t());
+ return;
+ }
*destination =
std::make_unique<DetachedBufferWriter>(filename, encoder_factory_());
+ if (!destination->get()->ran_out_of_space()) {
+ all_filenames_.emplace_back(path);
+ }
return;
}
CloseWriter(destination);
if (ran_out_of_space_) {
+ *destination->get() =
+ DetachedBufferWriter(DetachedBufferWriter::already_out_of_space_t());
return;
}
- all_filenames_.emplace_back(path);
+
*destination->get() = DetachedBufferWriter(filename, encoder_factory_());
+ if (!destination->get()->ran_out_of_space()) {
+ all_filenames_.emplace_back(path);
+ }
}
void MultiNodeLogNamer::RenameTempFile(DetachedBufferWriter *destination) {
@@ -319,6 +331,7 @@
if (!writer) {
return;
}
+ const bool was_open = writer->is_open();
writer->Close();
if (writer->max_write_time() > max_write_time_) {
@@ -335,7 +348,12 @@
ran_out_of_space_ = true;
writer->acknowledge_out_of_space();
}
- RenameTempFile(writer);
+ if (was_open) {
+ RenameTempFile(writer);
+ } else {
+ CHECK(access(std::string(writer->filename()).c_str(), F_OK) == -1)
+ << ": File should not exist: " << writer->filename();
+ }
}
} // namespace logger
diff --git a/aos/events/logging/log_namer.h b/aos/events/logging/log_namer.h
index 6d91b69..5384ab2 100644
--- a/aos/events/logging/log_namer.h
+++ b/aos/events/logging/log_namer.h
@@ -181,7 +181,13 @@
// Besides this function, this object will silently stop logging data when
// this occurs. If you want to ensure log files are complete, you must call
// this method.
- bool ran_out_of_space() const { return ran_out_of_space_; }
+ bool ran_out_of_space() const {
+ return accumulate_data_writers<bool>(
+ ran_out_of_space_, [](bool x, const DataWriter &data_writer) {
+ return x ||
+ (data_writer.writer && data_writer.writer->ran_out_of_space());
+ });
+ }
// Returns the maximum total_bytes() value for all existing
// DetachedBufferWriters.
diff --git a/aos/events/logging/logfile_utils.cc b/aos/events/logging/logfile_utils.cc
index 70dae61..8eec23e 100644
--- a/aos/events/logging/logfile_utils.cc
+++ b/aos/events/logging/logfile_utils.cc
@@ -38,11 +38,18 @@
DetachedBufferWriter::DetachedBufferWriter(
std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
: filename_(filename), encoder_(std::move(encoder)) {
- util::MkdirP(filename, 0777);
- fd_ = open(std::string(filename).c_str(),
- O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
- VLOG(1) << "Opened " << filename << " for writing";
- PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
+ if (!util::MkdirPIfSpace(filename, 0777)) {
+ ran_out_of_space_ = true;
+ } else {
+ fd_ = open(std::string(filename).c_str(),
+ O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
+ if (fd_ == -1 && errno == ENOSPC) {
+ ran_out_of_space_ = true;
+ } else {
+ PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
+ VLOG(1) << "Opened " << filename << " for writing";
+ }
+ }
}
DetachedBufferWriter::~DetachedBufferWriter() {
@@ -79,19 +86,19 @@
}
void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
+ if (ran_out_of_space_) {
+ // We don't want any later data to be written after space becomes
+ // available, so refuse to write anything more once we've dropped data
+ // because we ran out of space.
+ VLOG(1) << "Ignoring span: " << span.size();
+ return;
+ }
+
if (encoder_->may_bypass() && span.size() > 4096u) {
// Over this threshold, we'll assume it's cheaper to add an extra
// syscall to write the data immediately instead of copying it to
// enqueue.
- if (ran_out_of_space_) {
- // We don't want any later data to be written after space becomes
- // available, so refuse to write anything more once we've dropped data
- // because we ran out of space.
- VLOG(1) << "Ignoring span: " << span.size();
- return;
- }
-
// First, flush everything.
while (encoder_->queue_size() > 0u) {
Flush();
diff --git a/aos/events/logging/logfile_utils.h b/aos/events/logging/logfile_utils.h
index 0b0c8f5..fddda72 100644
--- a/aos/events/logging/logfile_utils.h
+++ b/aos/events/logging/logfile_utils.h
@@ -42,8 +42,14 @@
// file. It encodes them, queues them up, and batches the write operation.
class DetachedBufferWriter {
public:
+ // Marker struct for one of our constructor overloads.
+ struct already_out_of_space_t {};
+
DetachedBufferWriter(std::string_view filename,
std::unique_ptr<DetachedBufferEncoder> encoder);
+ // Creates a dummy instance which won't even open a file. It will act as if
+ // opening the file ran out of space immediately.
+ DetachedBufferWriter(already_out_of_space_t) : ran_out_of_space_(true) {}
DetachedBufferWriter(DetachedBufferWriter &&other);
DetachedBufferWriter(const DetachedBufferWriter &) = delete;
@@ -54,6 +60,10 @@
std::string_view filename() const { return filename_; }
+ // This will be true until Close() is called, unless the file couldn't be
+ // created due to running out of space.
+ bool is_open() const { return fd_ != -1; }
+
// Queues up a finished FlatBufferBuilder to be encoded and written.
//
// Triggers a flush if there's enough data queued up.
@@ -64,6 +74,9 @@
}
// May steal the backing storage of buffer, or may leave it alone.
void QueueSizedFlatbuffer(flatbuffers::DetachedBuffer &&buffer) {
+ if (ran_out_of_space_) {
+ return;
+ }
encoder_->Encode(std::move(buffer));
FlushAtThreshold();
}
diff --git a/aos/util/file.cc b/aos/util/file.cc
index 089efbc..afefa86 100644
--- a/aos/util/file.cc
+++ b/aos/util/file.cc
@@ -8,7 +8,6 @@
#include <string_view>
#include "aos/scoped/scoped_fd.h"
-#include "glog/logging.h"
namespace aos {
namespace util {
@@ -48,22 +47,30 @@
}
}
-void MkdirP(std::string_view path, mode_t mode) {
+bool MkdirPIfSpace(std::string_view path, mode_t mode) {
auto last_slash_pos = path.find_last_of("/");
std::string folder(last_slash_pos == std::string_view::npos
? std::string_view("")
: path.substr(0, last_slash_pos));
- if (folder.empty()) return;
- MkdirP(folder, mode);
+ if (folder.empty()) {
+ return true;
+ }
+ if (!MkdirPIfSpace(folder, mode)) {
+ return false;
+ }
const int result = mkdir(folder.c_str(), mode);
if (result == -1 && errno == EEXIST) {
VLOG(2) << folder << " already exists";
- return;
+ return true;
+ } else if (result == -1 && errno == ENOSPC) {
+ VLOG(2) << "Out of space";
+ return false;
} else {
VLOG(1) << "Created " << folder;
}
PCHECK(result == 0) << ": Error creating " << folder;
+ return true;
}
bool PathExists(std::string_view path) {
diff --git a/aos/util/file.h b/aos/util/file.h
index 9ee2fb4..bf216bb 100644
--- a/aos/util/file.h
+++ b/aos/util/file.h
@@ -4,6 +4,8 @@
#include <string>
#include <string_view>
+#include "glog/logging.h"
+
namespace aos {
namespace util {
@@ -15,7 +17,12 @@
void WriteStringToFileOrDie(const std::string_view filename,
const std::string_view contents);
-void MkdirP(std::string_view path, mode_t mode);
+// Returns true if it succeeds or false if the filesystem is full.
+bool MkdirPIfSpace(std::string_view path, mode_t mode);
+
+inline void MkdirP(std::string_view path, mode_t mode) {
+ CHECK(MkdirPIfSpace(path, mode));
+}
bool PathExists(std::string_view path);