Have Application class watch for binary file changes
It would be nice to have a mode for `aos_starter` that lets you readily
restart all changed applications. It's also good to know if the
applications running are the ones on disk. Track whether the file has
changed since we forked the application (as implemented, I have an
"unknown" state for if the file changed while forking--it should be
possible theoretically to detect this, but I don't want to deal with the
complexity, and it seems generally unlikely for the file to have changed
in the short window while it is fork'ing).
Change-Id: I55af6e2b9704fef328d90f42c1273c78c2b8268c
Signed-off-by: James Kuszmaul <jabukuszmaul+collab@gmail.com>
diff --git a/aos/starter/starter.fbs b/aos/starter/starter.fbs
index d2cd573..168b776 100644
--- a/aos/starter/starter.fbs
+++ b/aos/starter/starter.fbs
@@ -50,6 +50,27 @@
SET_GRP_ERR
}
+// Used to indicate the current state of the file being executed, so that we
+// can track which executables may need to be restarted.
+enum FileState : short {
+ // We are not currently in the RUNNING state, so we don't have anything to
+ // say. This will be set when the process is STARTING and STOPPING as well,
+ // because it may be in a transient state and we don't want to accidentally
+ // send out false positives regarding file changes.
+ NOT_RUNNING,
+
+ // The currently executing file is the one on disk.
+ NO_CHANGE,
+
+ // The file changed at some point between the fork() call and when we
+ // entered the RUNNING state, and we aren't currently set up to determine
+ // if the executed file is the one currently on disk.
+ CHANGED_DURING_STARTUP,
+
+ // The file has changed since it started RUNNING.
+ CHANGED,
+}
+
table Status {
statuses: [ApplicationStatus] (id: 0);
}
@@ -88,6 +109,10 @@
// specified for the starterd application (defaults to 1 Hz, can be overridden
// by --timing_report_ms).
has_active_timing_report: bool (id: 8);
+
+ // Current state of the running executable. This can be used to detect if
+ // a process should be restarted because the file has changed on disk.
+ file_state: FileState (id: 9);
}
root_type Status;
diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index cbb83f6..e68cf05 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc
@@ -349,7 +349,7 @@
// it grabbed the name after the fork() but before the execvp(). To
// protect against that, wait an extra cycle. If things aren't fixed by
// the second cycle, then that is a problem.
- if (pong_running_count < 2) {
+ if (pong_running_count < 3) {
return;
}
ASSERT_TRUE(pong_app_status->has_process_info());
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index 4d21c61..b6a20f0 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc
@@ -6,6 +6,7 @@
#include <sys/types.h>
#include <sys/wait.h>
+#include "absl/strings/str_split.h"
#include "glog/logging.h"
#include "aos/flatbuffer_merge.h"
@@ -31,6 +32,53 @@
sigset_t old_mask_;
};
+namespace {
+std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
+ struct stat stat_buf;
+ if (0 != stat(path.c_str(), &stat_buf)) {
+ return std::nullopt;
+ }
+ return stat_buf.st_ino;
+}
+bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
+ const std::optional<ino_t> current_inode = GetInodeForPath(path);
+ if (!current_inode.has_value()) {
+ return true;
+ }
+ return current_inode.value() != previous_inode;
+}
+} // namespace
+
+std::filesystem::path ResolvePath(std::string_view command) {
+ std::filesystem::path command_path = command;
+ if (command.find("/") != std::string_view::npos) {
+ CHECK(std::filesystem::exists(command_path))
+ << ": " << command << " does not exist.";
+ return std::filesystem::canonical(command_path);
+ }
+ const char *system_path = getenv("PATH");
+ std::string system_path_buffer;
+ if (system_path == nullptr) {
+ const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
+ PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
+ system_path_buffer.resize(default_path_length);
+ confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
+ system_path = system_path_buffer.c_str();
+ VLOG(2) << "Using default path of " << system_path
+ << " in the absence of PATH being set.";
+ }
+ const std::vector<std::string_view> search_paths =
+ absl::StrSplit(system_path, ':');
+ for (const std::string_view search_path : search_paths) {
+ const std::filesystem::path candidate =
+ std::filesystem::path(search_path) / command_path;
+ if (std::filesystem::exists(candidate)) {
+ return std::filesystem::canonical(candidate);
+ }
+ }
+ LOG(FATAL) << "Unable to resolve " << command;
+}
+
// RAII class to become root and restore back to the original user and group
// afterwards.
class Sudo {
@@ -150,12 +198,24 @@
std::function<void()> on_change,
QuietLogging quiet_flag)
: name_(name),
- path_(executable_name),
+ path_(ResolvePath(executable_name)),
event_loop_(event_loop),
start_timer_(event_loop_->AddTimer([this] {
status_ = aos::starter::State::RUNNING;
LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
<< "Started '" << name_ << "' pid: " << pid_;
+ // Check if the file on disk changed while we were starting up. We allow
+ // this state for the same reason that we don't just use /proc/$pid/exe
+ // to determine if the file is deleted--we may be running a script or
+ // sudo or the such and determining the state of the file that we
+ // actually care about sounds like more work than we want to deal with.
+ if (InodeChanged(path_, pre_fork_inode_)) {
+ file_state_ = FileState::CHANGED_DURING_STARTUP;
+ } else {
+ file_state_ = FileState::NO_CHANGE;
+ }
+
+ OnChange();
})),
restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
stop_timer_(event_loop_->AddTimer([this] {
@@ -237,7 +297,12 @@
// particular, the exit handler for shm_event_loop could be called here if
// we don't exec() quickly enough.
ScopedCompleteSignalBlocker signal_blocker;
-
+ {
+ const std::optional<ino_t> inode = GetInodeForPath(path_);
+ CHECK(inode.has_value())
+ << ": " << path_ << " does not seem to be stat'able.";
+ pre_fork_inode_ = inode.value();
+ }
const pid_t pid = fork();
if (pid != 0) {
@@ -352,7 +417,7 @@
if (run_as_sudo_) {
// For sudo we must supply the actual path
- args_.insert(args_.begin(), path_);
+ args_.insert(args_.begin(), path_.c_str());
args_.insert(args_.begin(), kSudo);
} else {
// argv[0] should be the program name
@@ -415,6 +480,7 @@
switch (status_) {
case aos::starter::State::STARTING:
case aos::starter::State::RUNNING: {
+ file_state_ = FileState::NOT_RUNNING;
LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
quiet_flag_ == QuietLogging::kNotForDebugging)
<< "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
@@ -526,9 +592,32 @@
}
}
+FileState Application::UpdateFileState() {
+ // On every call, check if a different file is present on disk. Note that
+ // while the applications is running, the file cannot be changed without the
+ // inode changing.
+ // We could presumably use inotify or the such to watch the file instead,
+ // but this works and we do not expect substantial cost from reading the inode
+ // of a file every time we send out a status message.
+ if (InodeChanged(path_, pre_fork_inode_)) {
+ switch (file_state_) {
+ case FileState::NO_CHANGE:
+ file_state_ = FileState::CHANGED;
+ break;
+ case FileState::NOT_RUNNING:
+ case FileState::CHANGED_DURING_STARTUP:
+ case FileState::CHANGED:
+ break;
+ }
+ }
+ return file_state_;
+}
+
flatbuffers::Offset<aos::starter::ApplicationStatus>
Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
util::Top *top) {
+ UpdateFileState();
+
CHECK_NOTNULL(builder);
auto name_fbs = builder->CreateString(name_);
@@ -557,6 +646,7 @@
// Note that even if process_info is null, calling add_process_info is fine.
status_builder.add_process_info(process_info);
status_builder.add_last_start_time(start_time_.time_since_epoch().count());
+ status_builder.add_file_state(file_state_);
return status_builder.Finish();
}
@@ -629,6 +719,7 @@
start_timer_->Disable();
exit_time_ = event_loop_->monotonic_now();
exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
+ file_state_ = FileState::NOT_RUNNING;
if (auto read_result = status_pipes_.read->Read()) {
stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index 9630e00..ca7ef9d 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h
@@ -1,6 +1,7 @@
#ifndef AOS_STARTER_SUBPROCESS_H_
#define AOS_STARTER_SUBPROCESS_H_
+#include <filesystem>
#include <memory>
#include <string>
#include <tuple>
@@ -15,6 +16,15 @@
namespace aos::starter {
+// Replicates the path resolution that will be attempted by the shell or
+// commands like execvp. Doing this manually allows us to conveniently know what
+// is actually being executed (rather than, e.g., querying /proc/$pid/exe after
+// the execvp() call is executed).
+// This is also useful when using the below class with sudo or bash scripts,
+// because in those circumstances /proc/$pid/exe contains sudo and /bin/bash (or
+// similar binary), rather than the actual thing being executed.
+std::filesystem::path ResolvePath(std::string_view command);
+
// Registers a signalfd listener with the given event loop and calls callback
// whenever a signal is received.
class SignalListener {
@@ -165,6 +175,8 @@
void ObserveTimingReport(const aos::monotonic_clock::time_point send_time,
const aos::timing::Report *msg);
+ FileState UpdateFileState();
+
private:
typedef aos::util::ScopedPipe::PipePair PipePair;
@@ -201,7 +213,10 @@
static inline uint64_t next_id_ = 0;
std::string name_;
- std::string path_;
+ std::filesystem::path path_;
+ // Inode of path_ immediately prior to the most recent fork() call.
+ ino_t pre_fork_inode_;
+ FileState file_state_ = FileState::NOT_RUNNING;
std::vector<std::string> args_;
std::string user_name_;
std::optional<uid_t> user_;
diff --git a/aos/starter/subprocess_test.cc b/aos/starter/subprocess_test.cc
index cab22b4..12f7f6d 100644
--- a/aos/starter/subprocess_test.cc
+++ b/aos/starter/subprocess_test.cc
@@ -3,6 +3,8 @@
#include <signal.h>
#include <sys/types.h>
+#include "absl/strings/str_join.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "aos/events/shm_event_loop.h"
@@ -225,4 +227,169 @@
event_loop.Run();
}
+
+// Test that if the binary changes out from under us that we note it in the
+// FileState.
+TEST_F(SubprocessTest, ChangeBinaryContents) {
+ const std::string config_file =
+ ::aos::testing::ArtifactPath("aos/events/pingpong_config.json");
+
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(config_file);
+ aos::ShmEventLoop event_loop(&config.message());
+
+ // Create a local copy of the sleep binary so that we can delete it.
+ const std::filesystem::path full_executable_path =
+ absl::StrCat(aos::testing::TestTmpDir(), "/", "sleep_binary");
+ aos::util::WriteStringToFileOrDie(
+ full_executable_path.native(),
+ aos::util::ReadFileToStringOrDie(ResolvePath("sleep").native()), S_IRWXU);
+
+ const std::filesystem::path executable_name =
+ absl::StrCat(aos::testing::TestTmpDir(), "/", "sleep_symlink");
+ // Create a symlink that points to the actual binary, and test that a
+ // Creating a symlink in particular lets us ensure that our logic actually
+ // pays attention to the target file that we are running rather than the
+ // symlink itself (it also saves us having to cp a binary somewhere where we
+ // can overwrite it).
+ std::filesystem::create_symlink(full_executable_path.native(),
+ executable_name);
+
+ // Wait until we are running, go through and test that various variations in
+ // file state result in the expected behavior, and then exit.
+ Application sleep(
+ "sleep", executable_name.native(), &event_loop,
+ [&sleep, &event_loop, executable_name, full_executable_path]() {
+ switch (sleep.status()) {
+ case aos::starter::State::RUNNING:
+ EXPECT_EQ(aos::starter::FileState::NO_CHANGE,
+ sleep.UpdateFileState());
+ // Delete the symlink; this should have no effect, because the
+ // Application class should be looking at the original path.
+ std::filesystem::remove(executable_name);
+ EXPECT_EQ(aos::starter::FileState::NO_CHANGE,
+ sleep.UpdateFileState());
+ // Delete the executable; it should be changed.
+ std::filesystem::remove(full_executable_path);
+ EXPECT_EQ(aos::starter::FileState::CHANGED,
+ sleep.UpdateFileState());
+ // Replace the executable itself; it should be changed.
+ aos::util::WriteStringToFileOrDie(full_executable_path.native(),
+ "abcdef");
+ EXPECT_EQ(aos::starter::FileState::CHANGED,
+ sleep.UpdateFileState());
+ // Terminate.
+ event_loop.Exit();
+ break;
+ case aos::starter::State::WAITING:
+ case aos::starter::State::STARTING:
+ case aos::starter::State::STOPPING:
+ case aos::starter::State::STOPPED:
+ EXPECT_EQ(aos::starter::FileState::NOT_RUNNING,
+ sleep.UpdateFileState());
+ break;
+ }
+ });
+ ASSERT_FALSE(sleep.autorestart());
+ // Ensure that the subprocess will run longer than we care about (we just call
+ // Terminate() below to stop it).
+ sleep.set_args({"1000"});
+
+ sleep.Start();
+ aos::TimerHandler *exit_timer = event_loop.AddTimer([&event_loop]() {
+ event_loop.Exit();
+ FAIL() << "We should have already exited.";
+ });
+ event_loop.OnRun([&event_loop, exit_timer]() {
+ exit_timer->Schedule(event_loop.monotonic_now() +
+ std::chrono::milliseconds(5000));
+ });
+
+ event_loop.Run();
+ sleep.Terminate();
+}
+
+class ResolvePathTest : public ::testing::Test {
+ protected:
+ ResolvePathTest() {
+ // Before doing anything else,
+ if (getenv("PATH") != nullptr) {
+ original_path_ = getenv("PATH");
+ PCHECK(0 == unsetenv("PATH"));
+ }
+ }
+
+ ~ResolvePathTest() {
+ if (!original_path_.empty()) {
+ PCHECK(0 == setenv("PATH", original_path_.c_str(), /*overwrite=*/1));
+ } else {
+ PCHECK(0 == unsetenv("PATH"));
+ }
+ }
+
+ std::filesystem::path GetLocalPath(const std::string filename) {
+ return absl::StrCat(aos::testing::TestTmpDir(), "/", filename);
+ }
+
+ std::filesystem::path CreateFile(const std::string filename) {
+ const std::filesystem::path file = GetLocalPath(filename);
+ VLOG(2) << "Creating file at " << file;
+ util::WriteStringToFileOrDie(file.native(), "contents");
+ return file;
+ }
+
+ void SetPath(const std::vector<std::string> &path) {
+ PCHECK(0 ==
+ setenv("PATH", absl::StrJoin(path, ":").c_str(), /*overwrite=*/1));
+ }
+
+ // Keep track of original PATH environment variable so that we can restore
+ // it.
+ std::string original_path_;
+};
+
+// Tests that we can resolve paths when there is no PATH environment variable.
+TEST_F(ResolvePathTest, ResolveWithUnsetPath) {
+ const std::filesystem::path local_echo = CreateFile("echo");
+ // Because the default path will be in /bin and /usr/bin (typically), we have
+ // to choose some utility that we can reasonably expect to be available in the
+ // test environment.
+ const std::filesystem::path echo_path = ResolvePath("echo");
+ EXPECT_THAT((std::vector<std::string>{"/bin/echo", "/usr/bin/echo"}),
+ ::testing::Contains(echo_path.native()));
+
+ // Test that a file with /'s in the name ignores the PATH.
+ const std::filesystem::path local_echo_path =
+ ResolvePath(local_echo.native());
+ EXPECT_EQ(local_echo_path, local_echo);
+}
+
+// Test that when the PATH environment variable is set that we can use it.
+TEST_F(ResolvePathTest, ResolveWithPath) {
+ const std::filesystem::path local_folder = GetLocalPath("bin/");
+ const std::filesystem::path local_folder2 = GetLocalPath("bin2/");
+ aos::util::MkdirP(local_folder.native(), S_IRWXU);
+ aos::util::MkdirP(local_folder2.native(), S_IRWXU);
+ SetPath({local_folder, local_folder2});
+ const std::filesystem::path binary = CreateFile("bin/binary");
+ const std::filesystem::path duplicate_binary = CreateFile("bin2/binary");
+ const std::filesystem::path other_name = CreateFile("bin2/other_name");
+
+ EXPECT_EQ(binary, ResolvePath("binary"));
+ EXPECT_EQ(other_name, ResolvePath("other_name"));
+ // And check that if we specify the full path for the duplicate binary that we
+ // can find it.
+ EXPECT_EQ(duplicate_binary, ResolvePath(duplicate_binary.native()));
+}
+
+// Test that we fail to find non-existent files.
+TEST_F(ResolvePathTest, DieOnFakeFile) {
+ // Fail to find something that searches the PATH.
+ SetPath({"foo", "bar"});
+ EXPECT_DEATH(ResolvePath("fake_file"), "Unable to resolve");
+
+ // Fail to find a local file.
+ EXPECT_DEATH(ResolvePath("./fake_file"), "./fake_file does not exist");
+}
+
} // namespace aos::starter::testing