blob: 4b170d313958598b024027ac7ea1be965da1a5bf [file] [log] [blame]
Alexei Strots01395492023-03-20 13:59:56 -07001#include "aos/events/logging/log_backend.h"
2
3#include <dirent.h>
4
5#include <filesystem>
6
7#include "absl/strings/str_cat.h"
8#include "aos/util/file.h"
9#include "glog/logging.h"
10
11DEFINE_bool(direct, false,
12 "If true, write using O_DIRECT and write 512 byte aligned blocks "
13 "whenever possible.");
Austin Schuh3ebaf782023-04-07 16:03:28 -070014DEFINE_bool(
15 sync, false,
16 "If true, sync data to disk as we go so we don't get too far ahead. Also "
17 "fadvise that we are done with the memory once it hits disk.");
Alexei Strots01395492023-03-20 13:59:56 -070018
19namespace aos::logger {
20namespace {
21constexpr const char *kTempExtension = ".tmp";
22}
23
24FileHandler::FileHandler(std::string filename)
25 : filename_(std::move(filename)), supports_odirect_(FLAGS_direct) {}
26
27FileHandler::~FileHandler() { Close(); }
28
29WriteCode FileHandler::OpenForWrite() {
30 iovec_.reserve(10);
31 if (!aos::util::MkdirPIfSpace(filename_, 0777)) {
32 return WriteCode::kOutOfSpace;
33 } else {
34 fd_ = open(filename_.c_str(), O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
35 if (fd_ == -1 && errno == ENOSPC) {
36 return WriteCode::kOutOfSpace;
37 } else {
38 PCHECK(fd_ != -1) << ": Failed to open " << filename_ << " for writing";
39 VLOG(1) << "Opened " << filename_ << " for writing";
40 }
41
42 flags_ = fcntl(fd_, F_GETFL, 0);
43 PCHECK(flags_ >= 0) << ": Failed to get flags for " << this->filename();
44
45 EnableDirect();
46
47 CHECK(std::filesystem::exists(filename_));
48
49 return WriteCode::kOk;
50 }
51}
52
53void FileHandler::EnableDirect() {
54 if (supports_odirect_ && !ODirectEnabled()) {
55 const int new_flags = flags_ | O_DIRECT;
56 // Track if we failed to set O_DIRECT. Note: Austin hasn't seen this call
57 // fail. The write call tends to fail instead.
58 if (fcntl(fd_, F_SETFL, new_flags) == -1) {
59 PLOG(WARNING) << "Failed to set O_DIRECT on " << filename();
60 supports_odirect_ = false;
61 } else {
62 VLOG(1) << "Enabled O_DIRECT on " << filename();
63 flags_ = new_flags;
64 }
65 }
66}
67
68void FileHandler::DisableDirect() {
69 if (supports_odirect_ && ODirectEnabled()) {
70 flags_ = flags_ & (~O_DIRECT);
71 PCHECK(fcntl(fd_, F_SETFL, flags_) != -1) << ": Failed to disable O_DIRECT";
72 VLOG(1) << "Disabled O_DIRECT on " << filename();
73 }
74}
75
76WriteResult FileHandler::Write(
77 const absl::Span<const absl::Span<const uint8_t>> &queue) {
78 iovec_.clear();
Austin Schuh3ebaf782023-04-07 16:03:28 -070079 CHECK_LE(queue.size(), static_cast<size_t>(IOV_MAX));
80 iovec_.resize(queue.size());
81 // Size of the data currently in iovec_.
Alexei Strots01395492023-03-20 13:59:56 -070082 size_t counted_size = 0;
83
84 // Ok, we now need to figure out if we were aligned, and if we were, how much
85 // of the data we are being asked to write is aligned.
86 //
Austin Schuh3ebaf782023-04-07 16:03:28 -070087 // When writing with O_DIRECT, the kernel only will accept writes where the
88 // offset into the file is a multiple of kSector, the data is aligned to
89 // kSector in memory, and the length being written is a multiple of kSector.
90 // Some of the callers use an aligned ResizeableBuffer to generate 512 byte
91 // aligned buffers for this code to find and use.
Alexei Strots01395492023-03-20 13:59:56 -070092 bool aligned = (total_write_bytes_ % kSector) == 0;
Austin Schuh3ebaf782023-04-07 16:03:28 -070093
94 // Index we are filling in next. Keeps resetting back to 0 as we write
95 // intermediates.
Alexei Strots01395492023-03-20 13:59:56 -070096 size_t write_index = 0;
Austin Schuh3ebaf782023-04-07 16:03:28 -070097 for (size_t i = 0; i < queue.size(); ++i) {
Alexei Strots01395492023-03-20 13:59:56 -070098 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
Austin Schuh3ebaf782023-04-07 16:03:28 -070099 iovec_[write_index].iov_len = queue[i].size();
Alexei Strots01395492023-03-20 13:59:56 -0700100
101 // Make sure the address is aligned, or give up. This should be uncommon,
102 // but is always possible.
Austin Schuh3ebaf782023-04-07 16:03:28 -0700103 if ((reinterpret_cast<size_t>(iovec_[write_index].iov_base) % kSector) !=
104 0) {
105 // Flush if we were aligned and have data.
106 if (aligned && write_index != 0) {
107 VLOG(1) << "Was aligned, now is not, writing previous data";
108 const auto code =
109 WriteV(iovec_.data(), write_index, true, counted_size);
110 if (code == WriteCode::kOutOfSpace) {
111 return {
112 .code = code,
113 .messages_written = i,
114 };
115 }
116
117 // Now, everything before here has been written. Make an iovec out of
118 // the rest and keep going.
119 write_index = 0;
120 counted_size = 0;
121
122 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
123 iovec_[write_index].iov_len = queue[i].size();
124 }
Alexei Strots01395492023-03-20 13:59:56 -0700125 aligned = false;
Austin Schuh3ebaf782023-04-07 16:03:28 -0700126 } else {
127 // We are now starting aligned again, and have data worth writing! Flush
128 // what was there before.
129 if (!aligned && iovec_[write_index].iov_len >= kSector &&
130 write_index != 0) {
131 VLOG(1) << "Was not aligned, now is, writing previous data";
132
133 const auto code =
134 WriteV(iovec_.data(), write_index, false, counted_size);
135 if (code == WriteCode::kOutOfSpace) {
136 return {
137 .code = code,
138 .messages_written = i,
139 };
140 }
141
142 // Now, everything before here has been written. Make an iovec out of
143 // the rest and keep going.
144 write_index = 0;
145 counted_size = 0;
146
147 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
148 iovec_[write_index].iov_len = queue[i].size();
149 aligned = true;
150 }
Alexei Strots01395492023-03-20 13:59:56 -0700151 }
152
153 // Now, see if the length is a multiple of kSector. The goal is to figure
154 // out if/how much memory we can write out with O_DIRECT so that only the
155 // last little bit is done with non-direct IO to keep it fast.
Alexei Strots01395492023-03-20 13:59:56 -0700156 if ((iovec_[write_index].iov_len % kSector) != 0) {
Austin Schuh3ebaf782023-04-07 16:03:28 -0700157 VLOG(1) << "Unaligned length " << iovec_[write_index].iov_len << " on "
158 << filename();
Alexei Strots01395492023-03-20 13:59:56 -0700159 // If we've got over a sector of data to write, write it out with O_DIRECT
160 // and then continue writing the rest unaligned.
161 if (aligned && iovec_[write_index].iov_len > kSector) {
162 const size_t aligned_size =
163 iovec_[write_index].iov_len & (~(kSector - 1));
164 VLOG(1) << "Was aligned, writing last chunk rounded from "
165 << queue[i].size() << " to " << aligned_size;
166 iovec_[write_index].iov_len = aligned_size;
167
168 const auto code =
Austin Schuh3ebaf782023-04-07 16:03:28 -0700169 WriteV(iovec_.data(), write_index + 1, true, counted_size + aligned_size);
Alexei Strots01395492023-03-20 13:59:56 -0700170 if (code == WriteCode::kOutOfSpace) {
171 return {
172 .code = code,
173 .messages_written = i,
174 };
175 }
176
177 // Now, everything before here has been written. Make an iovec out of
Austin Schuh3ebaf782023-04-07 16:03:28 -0700178 // the rest and keep going.
Alexei Strots01395492023-03-20 13:59:56 -0700179 write_index = 0;
180 counted_size = 0;
181
182 iovec_[write_index].iov_base =
183 const_cast<uint8_t *>(queue[i].data() + aligned_size);
184 iovec_[write_index].iov_len = queue[i].size() - aligned_size;
185 }
186 aligned = false;
187 }
188 VLOG(1) << "Writing " << iovec_[write_index].iov_len << " to "
189 << filename();
190 counted_size += iovec_[write_index].iov_len;
191 ++write_index;
192 }
193
194 // Either write the aligned data if it is all aligned, or write the rest
195 // unaligned if we wrote aligned up above.
Austin Schuh3ebaf782023-04-07 16:03:28 -0700196 const auto code = WriteV(iovec_.data(), write_index, aligned, counted_size);
Alexei Strots01395492023-03-20 13:59:56 -0700197 return {
198 .code = code,
Austin Schuh3ebaf782023-04-07 16:03:28 -0700199 .messages_written = queue.size(),
Alexei Strots01395492023-03-20 13:59:56 -0700200 };
201}
202
203WriteCode FileHandler::WriteV(struct iovec *iovec_data, size_t iovec_size,
204 bool aligned, size_t counted_size) {
205 // Configure the file descriptor to match the mode we should be in. This is
206 // safe to over-call since it only does the syscall if needed.
207 if (aligned) {
208 EnableDirect();
209 } else {
210 DisableDirect();
211 }
212
213 const auto start = aos::monotonic_clock::now();
Alexei Strots01395492023-03-20 13:59:56 -0700214
Austin Schuh3ebaf782023-04-07 16:03:28 -0700215 if (VLOG_IS_ON(2)) {
216 size_t to_be_written = 0;
217 for (size_t i = 0; i < iovec_size; ++i) {
218 VLOG(2) << " iov_base " << static_cast<void *>(iovec_data[i].iov_base)
219 << ", iov_len " << iovec_data[i].iov_len;
220 to_be_written += iovec_data[i].iov_len;
221 }
222 CHECK_GT(to_be_written, 0u);
223 VLOG(2) << "Going to write " << to_be_written;
224 }
225
226 const ssize_t written = writev(fd_, iovec_data, iovec_size);
227 VLOG(2) << "Wrote " << written << ", for iovec size " << iovec_size;
228
229 if (FLAGS_sync && written > 0) {
Alexei Strots01395492023-03-20 13:59:56 -0700230 // Flush asynchronously and force the data out of the cache.
231 sync_file_range(fd_, total_write_bytes_, written, SYNC_FILE_RANGE_WRITE);
232 if (last_synced_bytes_ != 0) {
233 // Per Linus' recommendation online on how to do fast file IO, do a
234 // blocking flush of the previous write chunk, and then tell the kernel to
235 // drop the pages from the cache. This makes sure we can't get too far
236 // ahead.
237 sync_file_range(fd_, last_synced_bytes_,
238 total_write_bytes_ - last_synced_bytes_,
239 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
240 SYNC_FILE_RANGE_WAIT_AFTER);
241 posix_fadvise(fd_, last_synced_bytes_,
242 total_write_bytes_ - last_synced_bytes_,
243 POSIX_FADV_DONTNEED);
244
Alexei Strots01395492023-03-20 13:59:56 -0700245 }
Austin Schuh3c4029f2023-04-13 12:09:55 -0700246 last_synced_bytes_ = total_write_bytes_;
Alexei Strots01395492023-03-20 13:59:56 -0700247 }
248
249 const auto end = aos::monotonic_clock::now();
250 if (written == -1 && errno == ENOSPC) {
251 return WriteCode::kOutOfSpace;
252 }
253 PCHECK(written >= 0) << ": write failed, got " << written;
254 if (written < static_cast<ssize_t>(counted_size)) {
255 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
256 // never seems to happen in any other case. If we ever want to log to a
257 // socket, this will happen more often. However, until we get there, we'll
258 // just assume it means we ran out of space.
259 return WriteCode::kOutOfSpace;
260 }
261
262 total_write_bytes_ += written;
263 write_stats_.UpdateStats(end - start, written, iovec_size);
264 return WriteCode::kOk;
265}
266
267WriteCode FileHandler::Close() {
268 if (!is_open()) {
269 return WriteCode::kOk;
270 }
271 bool ran_out_of_space = false;
272 if (close(fd_) == -1) {
273 if (errno == ENOSPC) {
274 ran_out_of_space = true;
275 } else {
276 PLOG(ERROR) << "Closing log file failed";
277 }
278 }
279 fd_ = -1;
280 VLOG(1) << "Closed " << filename_;
281 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
282}
283
284FileBackend::FileBackend(std::string_view base_name)
285 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
286
287std::unique_ptr<FileHandler> FileBackend::RequestFile(std::string_view id) {
288 const std::string filename = absl::StrCat(base_name_, separator_, id);
289 return std::make_unique<FileHandler>(filename);
290}
291
292RenamableFileBackend::RenamableFileBackend(std::string_view base_name)
293 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
294
295std::unique_ptr<FileHandler> RenamableFileBackend::RequestFile(
296 std::string_view id) {
297 const std::string filename =
298 absl::StrCat(base_name_, separator_, id, temp_suffix_);
299 return std::make_unique<RenamableFileHandler>(this, filename);
300}
301
302void RenamableFileBackend::EnableTempFiles() {
303 use_temp_files_ = true;
304 temp_suffix_ = kTempExtension;
305}
306
307bool RenamableFileBackend::RenameLogBase(std::string_view new_base_name) {
308 if (new_base_name == base_name_) {
309 return true;
310 }
311 CHECK(old_base_name_.empty())
312 << "Only one change of base_name is supported. Was: " << old_base_name_;
313
314 std::string current_directory = base_name_;
315 std::string new_directory(new_base_name);
316
317 auto current_path_split = current_directory.rfind("/");
318 CHECK(current_path_split != std::string::npos)
319 << "Could not find / in the current directory path";
320 auto new_path_split = new_directory.rfind("/");
321 CHECK(new_path_split != std::string::npos)
322 << "Could not find / in the new directory path";
323
324 CHECK(new_base_name.substr(new_path_split) ==
325 current_directory.substr(current_path_split))
326 << "Rename of file base from " << current_directory << " to "
327 << new_directory << " is not supported.";
328
329 current_directory.resize(current_path_split);
330 new_directory.resize(new_path_split);
331 DIR *dir = opendir(current_directory.c_str());
332 if (dir) {
333 closedir(dir);
334 const int result = rename(current_directory.c_str(), new_directory.c_str());
335 if (result != 0) {
336 PLOG(ERROR) << "Unable to rename " << current_directory << " to "
337 << new_directory;
338 return false;
339 }
340 } else {
341 // Handle if directory was already renamed.
342 dir = opendir(new_directory.c_str());
343 if (!dir) {
344 LOG(ERROR) << "Old directory " << current_directory
345 << " missing and new directory " << new_directory
346 << " not present.";
347 return false;
348 }
349 closedir(dir);
350 }
351 old_base_name_ = base_name_;
352 base_name_ = std::string(new_base_name);
353 separator_ = base_name_.back() == '/' ? "" : "_";
354 return true;
355}
356
357WriteCode RenamableFileBackend::RenameFileAfterClose(
358 std::string_view filename) {
359 // Fast check that we can skip rename.
360 if (!use_temp_files_ && old_base_name_.empty()) {
361 return WriteCode::kOk;
362 }
363
364 std::string current_filename(filename);
365
366 // When changing the base name, we rename the log folder while there active
367 // buffer writers. Therefore, the name of that active buffer may still refer
368 // to the old file location rather than the new one.
369 if (!old_base_name_.empty()) {
370 auto offset = current_filename.find(old_base_name_);
371 if (offset != std::string::npos) {
372 current_filename.replace(offset, old_base_name_.length(), base_name_);
373 }
374 }
375
376 std::string final_filename = current_filename;
377 if (use_temp_files_) {
378 CHECK(current_filename.size() > temp_suffix_.size());
379 final_filename = current_filename.substr(
380 0, current_filename.size() - temp_suffix_.size());
381 }
382
383 int result = rename(current_filename.c_str(), final_filename.c_str());
384
385 bool ran_out_of_space = false;
386 if (result != 0) {
387 if (errno == ENOSPC) {
388 ran_out_of_space = true;
389 } else {
390 PLOG(FATAL) << "Renaming " << current_filename << " to " << final_filename
391 << " failed";
392 }
393 } else {
394 VLOG(1) << "Renamed " << current_filename << " -> " << final_filename;
395 }
396 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
397}
398
399WriteCode RenamableFileBackend::RenamableFileHandler::Close() {
400 if (!is_open()) {
401 return WriteCode::kOk;
402 }
403 if (FileHandler::Close() == WriteCode::kOutOfSpace) {
404 return WriteCode::kOutOfSpace;
405 }
406 if (owner_->RenameFileAfterClose(filename()) == WriteCode::kOutOfSpace) {
407 return WriteCode::kOutOfSpace;
408 }
409 return WriteCode::kOk;
410}
Austin Schuh3ebaf782023-04-07 16:03:28 -0700411} // namespace aos::logger