blob: 01345c913b81afda126fc5937a51ab3bb449b38a [file] [log] [blame]
Alexei Strots01395492023-03-20 13:59:56 -07001#include "aos/events/logging/log_backend.h"
2
3#include <dirent.h>
4
5#include <filesystem>
6
7#include "absl/strings/str_cat.h"
8#include "aos/util/file.h"
9#include "glog/logging.h"
10
11DEFINE_bool(direct, false,
12 "If true, write using O_DIRECT and write 512 byte aligned blocks "
13 "whenever possible.");
Austin Schuh3ebaf782023-04-07 16:03:28 -070014DEFINE_bool(
15 sync, false,
16 "If true, sync data to disk as we go so we don't get too far ahead. Also "
17 "fadvise that we are done with the memory once it hits disk.");
Alexei Strots01395492023-03-20 13:59:56 -070018
19namespace aos::logger {
20namespace {
21constexpr const char *kTempExtension = ".tmp";
22}
23
24FileHandler::FileHandler(std::string filename)
25 : filename_(std::move(filename)), supports_odirect_(FLAGS_direct) {}
26
27FileHandler::~FileHandler() { Close(); }
28
29WriteCode FileHandler::OpenForWrite() {
30 iovec_.reserve(10);
31 if (!aos::util::MkdirPIfSpace(filename_, 0777)) {
32 return WriteCode::kOutOfSpace;
33 } else {
34 fd_ = open(filename_.c_str(), O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
35 if (fd_ == -1 && errno == ENOSPC) {
36 return WriteCode::kOutOfSpace;
37 } else {
38 PCHECK(fd_ != -1) << ": Failed to open " << filename_ << " for writing";
39 VLOG(1) << "Opened " << filename_ << " for writing";
40 }
41
42 flags_ = fcntl(fd_, F_GETFL, 0);
43 PCHECK(flags_ >= 0) << ": Failed to get flags for " << this->filename();
44
45 EnableDirect();
46
47 CHECK(std::filesystem::exists(filename_));
48
49 return WriteCode::kOk;
50 }
51}
52
53void FileHandler::EnableDirect() {
54 if (supports_odirect_ && !ODirectEnabled()) {
55 const int new_flags = flags_ | O_DIRECT;
56 // Track if we failed to set O_DIRECT. Note: Austin hasn't seen this call
57 // fail. The write call tends to fail instead.
58 if (fcntl(fd_, F_SETFL, new_flags) == -1) {
59 PLOG(WARNING) << "Failed to set O_DIRECT on " << filename();
60 supports_odirect_ = false;
61 } else {
62 VLOG(1) << "Enabled O_DIRECT on " << filename();
63 flags_ = new_flags;
64 }
65 }
66}
67
68void FileHandler::DisableDirect() {
69 if (supports_odirect_ && ODirectEnabled()) {
70 flags_ = flags_ & (~O_DIRECT);
71 PCHECK(fcntl(fd_, F_SETFL, flags_) != -1) << ": Failed to disable O_DIRECT";
72 VLOG(1) << "Disabled O_DIRECT on " << filename();
73 }
74}
75
Austin Schuh9e8df9e2023-05-03 08:28:29 -070076inline bool IsAlignedStart(const absl::Span<const uint8_t> span) {
77 return (reinterpret_cast<size_t>(span.data()) % FileHandler::kSector) == 0;
78}
79
80inline bool IsAlignedLength(const absl::Span<const uint8_t> span) {
81 return (span.size() % FileHandler::kSector) == 0;
82}
83
84inline bool HasAtleastOneSector(const absl::Span<const uint8_t> span) {
85 return span.size() >= FileHandler::kSector;
86}
87
Alexei Strots01395492023-03-20 13:59:56 -070088WriteResult FileHandler::Write(
89 const absl::Span<const absl::Span<const uint8_t>> &queue) {
90 iovec_.clear();
Austin Schuh3ebaf782023-04-07 16:03:28 -070091 CHECK_LE(queue.size(), static_cast<size_t>(IOV_MAX));
92 iovec_.resize(queue.size());
93 // Size of the data currently in iovec_.
Alexei Strots01395492023-03-20 13:59:56 -070094 size_t counted_size = 0;
95
96 // Ok, we now need to figure out if we were aligned, and if we were, how much
97 // of the data we are being asked to write is aligned.
98 //
Austin Schuh3ebaf782023-04-07 16:03:28 -070099 // When writing with O_DIRECT, the kernel only will accept writes where the
100 // offset into the file is a multiple of kSector, the data is aligned to
101 // kSector in memory, and the length being written is a multiple of kSector.
102 // Some of the callers use an aligned ResizeableBuffer to generate 512 byte
103 // aligned buffers for this code to find and use.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700104 bool was_aligned = (total_write_bytes_ % kSector) == 0;
105
106 if (was_aligned) {
107 VLOG(1) << "Started aligned at offset " << total_write_bytes_ << " on "
108 << filename();
109 } else {
110 VLOG(1) << "Started unaligned at offset " << total_write_bytes_ << " on "
111 << filename();
112 }
Austin Schuh3ebaf782023-04-07 16:03:28 -0700113
114 // Index we are filling in next. Keeps resetting back to 0 as we write
115 // intermediates.
Alexei Strots01395492023-03-20 13:59:56 -0700116 size_t write_index = 0;
Austin Schuh3ebaf782023-04-07 16:03:28 -0700117 for (size_t i = 0; i < queue.size(); ++i) {
Alexei Strots01395492023-03-20 13:59:56 -0700118 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
Austin Schuh3ebaf782023-04-07 16:03:28 -0700119 iovec_[write_index].iov_len = queue[i].size();
Alexei Strots01395492023-03-20 13:59:56 -0700120
121 // Make sure the address is aligned, or give up. This should be uncommon,
122 // but is always possible.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700123 if (!IsAlignedStart(queue[i])) {
Austin Schuh3ebaf782023-04-07 16:03:28 -0700124 // Flush if we were aligned and have data.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700125 if (was_aligned && write_index != 0) {
Austin Schuh3ebaf782023-04-07 16:03:28 -0700126 VLOG(1) << "Was aligned, now is not, writing previous data";
127 const auto code =
128 WriteV(iovec_.data(), write_index, true, counted_size);
129 if (code == WriteCode::kOutOfSpace) {
130 return {
131 .code = code,
132 .messages_written = i,
133 };
134 }
135
136 // Now, everything before here has been written. Make an iovec out of
137 // the rest and keep going.
138 write_index = 0;
139 counted_size = 0;
140
141 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
142 iovec_[write_index].iov_len = queue[i].size();
143 }
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700144 was_aligned = false;
Austin Schuh3ebaf782023-04-07 16:03:28 -0700145 } else {
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700146 // We are now starting aligned again, and have data worth writing! Flush
Austin Schuh3ebaf782023-04-07 16:03:28 -0700147 // what was there before.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700148 if (!was_aligned && HasAtleastOneSector(queue[i]) &&
149 ((total_write_bytes_ + counted_size) % kSector) == 0 &&
Austin Schuh3ebaf782023-04-07 16:03:28 -0700150 write_index != 0) {
151 VLOG(1) << "Was not aligned, now is, writing previous data";
152
153 const auto code =
154 WriteV(iovec_.data(), write_index, false, counted_size);
155 if (code == WriteCode::kOutOfSpace) {
156 return {
157 .code = code,
158 .messages_written = i,
159 };
160 }
161
162 // Now, everything before here has been written. Make an iovec out of
163 // the rest and keep going.
164 write_index = 0;
165 counted_size = 0;
166
167 iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data());
168 iovec_[write_index].iov_len = queue[i].size();
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700169 was_aligned = true;
Austin Schuh3ebaf782023-04-07 16:03:28 -0700170 }
Alexei Strots01395492023-03-20 13:59:56 -0700171 }
172
173 // Now, see if the length is a multiple of kSector. The goal is to figure
174 // out if/how much memory we can write out with O_DIRECT so that only the
175 // last little bit is done with non-direct IO to keep it fast.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700176 if (!IsAlignedLength(queue[i])) {
177 VLOG(1) << "Unaligned length " << queue[i].size() << " on " << filename();
178 // If we've got over a sector of data to write, write it out with
179 // O_DIRECT and then continue writing the rest unaligned.
180 if (was_aligned) {
181 if (!HasAtleastOneSector(queue[i])) {
182 if (write_index > 0) {
183 const auto code =
184 WriteV(iovec_.data(), write_index, true, counted_size);
185 if (code == WriteCode::kOutOfSpace) {
186 return {
187 .code = code,
188 .messages_written = i,
189 };
190 }
Alexei Strots01395492023-03-20 13:59:56 -0700191
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700192 // Now, everything before here has been written. Make an iovec out
193 // of the rest and keep going.
194 write_index = 0;
195 counted_size = 0;
196
197 iovec_[write_index].iov_base =
198 const_cast<uint8_t *>(queue[i].data());
199 iovec_[write_index].iov_len = queue[i].size();
200 }
201 } else {
202 const size_t aligned_size =
203 iovec_[write_index].iov_len & (~(kSector - 1));
204 VLOG(1) << "Was aligned, writing last chunk rounded from "
205 << queue[i].size() << " to " << aligned_size;
206 iovec_[write_index].iov_len = aligned_size;
207
208 const auto code = WriteV(iovec_.data(), write_index + 1, true,
209 counted_size + aligned_size);
210 if (code == WriteCode::kOutOfSpace) {
211 return {
212 .code = code,
213 .messages_written = i,
214 };
215 }
216
217 // Now, everything before here has been written. Make an iovec out of
218 // the rest and keep going.
219 write_index = 0;
220 counted_size = 0;
221
222 iovec_[write_index].iov_base =
223 const_cast<uint8_t *>(queue[i].data() + aligned_size);
224 iovec_[write_index].iov_len = queue[i].size() - aligned_size;
Alexei Strots01395492023-03-20 13:59:56 -0700225 }
Alexei Strots01395492023-03-20 13:59:56 -0700226 }
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700227 was_aligned = false;
Alexei Strots01395492023-03-20 13:59:56 -0700228 }
229 VLOG(1) << "Writing " << iovec_[write_index].iov_len << " to "
230 << filename();
231 counted_size += iovec_[write_index].iov_len;
232 ++write_index;
233 }
234
235 // Either write the aligned data if it is all aligned, or write the rest
236 // unaligned if we wrote aligned up above.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700237 const auto code = WriteV(iovec_.data(), write_index, was_aligned, counted_size);
Alexei Strots01395492023-03-20 13:59:56 -0700238 return {
239 .code = code,
Austin Schuh3ebaf782023-04-07 16:03:28 -0700240 .messages_written = queue.size(),
Alexei Strots01395492023-03-20 13:59:56 -0700241 };
242}
243
244WriteCode FileHandler::WriteV(struct iovec *iovec_data, size_t iovec_size,
245 bool aligned, size_t counted_size) {
246 // Configure the file descriptor to match the mode we should be in. This is
247 // safe to over-call since it only does the syscall if needed.
248 if (aligned) {
249 EnableDirect();
250 } else {
251 DisableDirect();
252 }
253
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700254 CHECK_GT(iovec_size, 0u);
Alexei Strots01395492023-03-20 13:59:56 -0700255 const auto start = aos::monotonic_clock::now();
Alexei Strots01395492023-03-20 13:59:56 -0700256
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700257 if (aligned) {
258 CHECK_EQ((total_write_bytes_ % FileHandler::kSector), 0u)
259 << ": Failed after writing " << total_write_bytes_
260 << " to the file, attempting aligned write with unaligned start.";
261 for (size_t i = 0; i < iovec_size; ++i) {
262 absl::Span<const uint8_t> data(
263 reinterpret_cast<const uint8_t *>(iovec_data[i].iov_base),
264 iovec_data[i].iov_len);
265 VLOG(2) << " iov_base " << static_cast<void *>(iovec_data[i].iov_base)
266 << ", iov_len " << iovec_data[i].iov_len;
267 CHECK(IsAlignedStart(data) && IsAlignedLength(data));
268 CHECK_GT(data.size(), 0u);
269 }
270 } else {
271 size_t accumulated_write_bytes = total_write_bytes_;
272 for (size_t i = 0; i < iovec_size; ++i) {
273 absl::Span<const uint8_t> data(
274 reinterpret_cast<const uint8_t *>(iovec_data[i].iov_base),
275 iovec_data[i].iov_len);
276 VLOG(2) << " accumulated_write_bytes 0x" << std::hex
277 << accumulated_write_bytes << " (" << std::dec
278 << accumulated_write_bytes << "), iov_base "
279 << static_cast<void *>(iovec_data[i].iov_base) << ", iov_len 0x"
280 << std::hex << iovec_data[i].iov_len << " (" << std::dec
281 << iovec_data[i].iov_len << ")";
282
283 if ((accumulated_write_bytes % FileHandler::kSector) == 0) {
284 CHECK(!IsAlignedStart(data) || !IsAlignedLength(data));
285 }
286
287 accumulated_write_bytes += data.size();
288 CHECK_GT(data.size(), 0u);
289 }
290 }
291
Austin Schuh3ebaf782023-04-07 16:03:28 -0700292 if (VLOG_IS_ON(2)) {
293 size_t to_be_written = 0;
294 for (size_t i = 0; i < iovec_size; ++i) {
Austin Schuh3ebaf782023-04-07 16:03:28 -0700295 to_be_written += iovec_data[i].iov_len;
296 }
Austin Schuh3ebaf782023-04-07 16:03:28 -0700297 VLOG(2) << "Going to write " << to_be_written;
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700298 CHECK_GT(to_be_written, 0u);
Austin Schuh3ebaf782023-04-07 16:03:28 -0700299 }
300
301 const ssize_t written = writev(fd_, iovec_data, iovec_size);
302 VLOG(2) << "Wrote " << written << ", for iovec size " << iovec_size;
303
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700304 if (FLAGS_sync) {
Alexei Strots01395492023-03-20 13:59:56 -0700305 // Flush asynchronously and force the data out of the cache.
306 sync_file_range(fd_, total_write_bytes_, written, SYNC_FILE_RANGE_WRITE);
307 if (last_synced_bytes_ != 0) {
308 // Per Linus' recommendation online on how to do fast file IO, do a
309 // blocking flush of the previous write chunk, and then tell the kernel to
310 // drop the pages from the cache. This makes sure we can't get too far
311 // ahead.
312 sync_file_range(fd_, last_synced_bytes_,
313 total_write_bytes_ - last_synced_bytes_,
314 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
315 SYNC_FILE_RANGE_WAIT_AFTER);
316 posix_fadvise(fd_, last_synced_bytes_,
317 total_write_bytes_ - last_synced_bytes_,
318 POSIX_FADV_DONTNEED);
319
Alexei Strots01395492023-03-20 13:59:56 -0700320 }
Austin Schuh3c4029f2023-04-13 12:09:55 -0700321 last_synced_bytes_ = total_write_bytes_;
Alexei Strots01395492023-03-20 13:59:56 -0700322 }
323
324 const auto end = aos::monotonic_clock::now();
325 if (written == -1 && errno == ENOSPC) {
326 return WriteCode::kOutOfSpace;
327 }
328 PCHECK(written >= 0) << ": write failed, got " << written;
329 if (written < static_cast<ssize_t>(counted_size)) {
330 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
331 // never seems to happen in any other case. If we ever want to log to a
332 // socket, this will happen more often. However, until we get there, we'll
333 // just assume it means we ran out of space.
334 return WriteCode::kOutOfSpace;
335 }
336
337 total_write_bytes_ += written;
338 write_stats_.UpdateStats(end - start, written, iovec_size);
339 return WriteCode::kOk;
340}
341
342WriteCode FileHandler::Close() {
343 if (!is_open()) {
344 return WriteCode::kOk;
345 }
346 bool ran_out_of_space = false;
347 if (close(fd_) == -1) {
348 if (errno == ENOSPC) {
349 ran_out_of_space = true;
350 } else {
351 PLOG(ERROR) << "Closing log file failed";
352 }
353 }
354 fd_ = -1;
355 VLOG(1) << "Closed " << filename_;
356 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
357}
358
359FileBackend::FileBackend(std::string_view base_name)
360 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
361
362std::unique_ptr<FileHandler> FileBackend::RequestFile(std::string_view id) {
363 const std::string filename = absl::StrCat(base_name_, separator_, id);
364 return std::make_unique<FileHandler>(filename);
365}
366
367RenamableFileBackend::RenamableFileBackend(std::string_view base_name)
368 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
369
370std::unique_ptr<FileHandler> RenamableFileBackend::RequestFile(
371 std::string_view id) {
372 const std::string filename =
373 absl::StrCat(base_name_, separator_, id, temp_suffix_);
374 return std::make_unique<RenamableFileHandler>(this, filename);
375}
376
377void RenamableFileBackend::EnableTempFiles() {
378 use_temp_files_ = true;
379 temp_suffix_ = kTempExtension;
380}
381
382bool RenamableFileBackend::RenameLogBase(std::string_view new_base_name) {
383 if (new_base_name == base_name_) {
384 return true;
385 }
386 CHECK(old_base_name_.empty())
387 << "Only one change of base_name is supported. Was: " << old_base_name_;
388
389 std::string current_directory = base_name_;
390 std::string new_directory(new_base_name);
391
392 auto current_path_split = current_directory.rfind("/");
393 CHECK(current_path_split != std::string::npos)
394 << "Could not find / in the current directory path";
395 auto new_path_split = new_directory.rfind("/");
396 CHECK(new_path_split != std::string::npos)
397 << "Could not find / in the new directory path";
398
399 CHECK(new_base_name.substr(new_path_split) ==
400 current_directory.substr(current_path_split))
401 << "Rename of file base from " << current_directory << " to "
402 << new_directory << " is not supported.";
403
404 current_directory.resize(current_path_split);
405 new_directory.resize(new_path_split);
406 DIR *dir = opendir(current_directory.c_str());
407 if (dir) {
408 closedir(dir);
409 const int result = rename(current_directory.c_str(), new_directory.c_str());
410 if (result != 0) {
411 PLOG(ERROR) << "Unable to rename " << current_directory << " to "
412 << new_directory;
413 return false;
414 }
415 } else {
416 // Handle if directory was already renamed.
417 dir = opendir(new_directory.c_str());
418 if (!dir) {
419 LOG(ERROR) << "Old directory " << current_directory
420 << " missing and new directory " << new_directory
421 << " not present.";
422 return false;
423 }
424 closedir(dir);
425 }
426 old_base_name_ = base_name_;
427 base_name_ = std::string(new_base_name);
428 separator_ = base_name_.back() == '/' ? "" : "_";
429 return true;
430}
431
432WriteCode RenamableFileBackend::RenameFileAfterClose(
433 std::string_view filename) {
434 // Fast check that we can skip rename.
435 if (!use_temp_files_ && old_base_name_.empty()) {
436 return WriteCode::kOk;
437 }
438
439 std::string current_filename(filename);
440
441 // When changing the base name, we rename the log folder while there active
442 // buffer writers. Therefore, the name of that active buffer may still refer
443 // to the old file location rather than the new one.
444 if (!old_base_name_.empty()) {
445 auto offset = current_filename.find(old_base_name_);
446 if (offset != std::string::npos) {
447 current_filename.replace(offset, old_base_name_.length(), base_name_);
448 }
449 }
450
451 std::string final_filename = current_filename;
452 if (use_temp_files_) {
453 CHECK(current_filename.size() > temp_suffix_.size());
454 final_filename = current_filename.substr(
455 0, current_filename.size() - temp_suffix_.size());
456 }
457
458 int result = rename(current_filename.c_str(), final_filename.c_str());
459
460 bool ran_out_of_space = false;
461 if (result != 0) {
462 if (errno == ENOSPC) {
463 ran_out_of_space = true;
464 } else {
465 PLOG(FATAL) << "Renaming " << current_filename << " to " << final_filename
466 << " failed";
467 }
468 } else {
469 VLOG(1) << "Renamed " << current_filename << " -> " << final_filename;
470 }
471 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
472}
473
474WriteCode RenamableFileBackend::RenamableFileHandler::Close() {
475 if (!is_open()) {
476 return WriteCode::kOk;
477 }
478 if (FileHandler::Close() == WriteCode::kOutOfSpace) {
479 return WriteCode::kOutOfSpace;
480 }
481 if (owner_->RenameFileAfterClose(filename()) == WriteCode::kOutOfSpace) {
482 return WriteCode::kOutOfSpace;
483 }
484 return WriteCode::kOk;
485}
Austin Schuh3ebaf782023-04-07 16:03:28 -0700486} // namespace aos::logger