blob: 9b3f0908d2eae3f0dcf19d061e465dfacd7bcd4d [file] [log] [blame]
Alexei Strots01395492023-03-20 13:59:56 -07001#include "aos/events/logging/log_backend.h"
2
3#include <dirent.h>
4
5#include <filesystem>
6
7#include "absl/strings/str_cat.h"
8#include "aos/util/file.h"
9#include "glog/logging.h"
10
11DEFINE_bool(direct, false,
12 "If true, write using O_DIRECT and write 512 byte aligned blocks "
13 "whenever possible.");
Austin Schuh3ebaf782023-04-07 16:03:28 -070014DEFINE_bool(
15 sync, false,
16 "If true, sync data to disk as we go so we don't get too far ahead. Also "
17 "fadvise that we are done with the memory once it hits disk.");
Alexei Strots01395492023-03-20 13:59:56 -070018
Alexei Strotsa0b99d72023-04-11 15:12:42 -070019DEFINE_uint32(queue_reserve, 32, "Pre-reserved size of write queue.");
20
Alexei Strots01395492023-03-20 13:59:56 -070021namespace aos::logger {
22namespace {
23constexpr const char *kTempExtension = ".tmp";
Alexei Strotsa0b99d72023-04-11 15:12:42 -070024
25// Assuming that kSector is power of 2, it aligns address to the left size.
26inline size_t AlignToLeft(size_t value) {
27 return value & (~(FileHandler::kSector - 1));
28}
29
30inline bool IsAligned(size_t value) {
31 return value % FileHandler::kSector == 0;
32}
33
34inline bool IsAlignedStart(const absl::Span<const uint8_t> span) {
35 return (reinterpret_cast<size_t>(span.data()) % FileHandler::kSector) == 0;
36}
37
38inline bool IsAlignedLength(const absl::Span<const uint8_t> span) {
39 return (span.size() % FileHandler::kSector) == 0;
40}
41
42} // namespace
43
44logger::QueueAligner::QueueAligner() {
45 aligned_queue_.reserve(FLAGS_queue_reserve);
46}
47
48void logger::QueueAligner::FillAlignedQueue(
49 const absl::Span<const absl::Span<const uint8_t>> &queue) {
50 aligned_queue_.clear();
51
52 for (const auto &span : queue) {
53 // Generally, every span might have 3 optional parts (i.e. 2^3 cases):
54 // 1. unaligned prefix - from start till first aligned block.
55 // 2. aligned main - block with aligned start and size
56 // 3. unaligned suffix - block with aligned start, and size less than one
57 // sector. If size of the span is less than 1 sector, let's call it prefix.
58
59 auto *data = span.data();
60 size_t size = span.size();
61 const auto start = reinterpret_cast<size_t>(data);
62 VLOG(2) << "Consider span starting at " << std::hex << start
63 << " with size " << size;
64
65 CHECK_GT(size, 0u) << ": Nobody should be sending empty messages.";
66
67 const auto next_aligned =
68 IsAligned(start) ? start : AlignToLeft(start) + FileHandler::kSector;
69 const auto prefix_size = next_aligned - start;
70 VLOG(2) << "Calculated prefix size " << std::hex << prefix_size;
71
72 if (prefix_size >= size) {
73 // size of prefix >= size of span - alignment is not possible, accept the
74 // whole span
75 VLOG(2) << "Only prefix found";
76 CHECK_GT(size, 0u);
77 aligned_queue_.emplace_back(data, size, false);
78 continue;
79 }
80 CHECK_LT(prefix_size, FileHandler::kSector)
81 << ": Wrong calculation of 'next' aligned position";
82 if (prefix_size > 0) {
83 // Cut the prefix and move to the main part.
84 VLOG(2) << "Cutting prefix at " << std::hex << start << " of size "
85 << prefix_size;
86 aligned_queue_.emplace_back(data, prefix_size, false);
87 data += prefix_size;
88 size -= prefix_size;
89 CHECK(data <= span.data() + span.size()) << " :Boundaries after prefix";
90 }
91
92 if (IsAligned(size)) {
93 // the rest is aligned.
94 VLOG(2) << "Returning aligned main part";
95 CHECK_GT(size, 0u);
96 aligned_queue_.emplace_back(data, size, true);
97 continue;
98 }
99
100 const auto aligned_size = AlignToLeft(size);
101 CHECK(aligned_size < size) << ": Wrong calculation of 'main' size";
102 if (aligned_size > 0) {
103 VLOG(2) << "Cutting main part starting " << std::hex
104 << reinterpret_cast<size_t>(data) << " of size " << aligned_size;
105 aligned_queue_.emplace_back(data, aligned_size, true);
106
107 data += aligned_size;
108 size -= aligned_size;
109 CHECK(data <= span.data() + span.size()) << " :Boundaries after main";
110 }
111
112 VLOG(2) << "Cutting suffix part starting " << std::hex
113 << reinterpret_cast<size_t>(data) << " of size " << size;
114 CHECK_GT(size, 0u);
115 aligned_queue_.emplace_back(data, size, false);
116 }
Alexei Strots01395492023-03-20 13:59:56 -0700117}
118
119FileHandler::FileHandler(std::string filename)
120 : filename_(std::move(filename)), supports_odirect_(FLAGS_direct) {}
121
122FileHandler::~FileHandler() { Close(); }
123
124WriteCode FileHandler::OpenForWrite() {
125 iovec_.reserve(10);
126 if (!aos::util::MkdirPIfSpace(filename_, 0777)) {
127 return WriteCode::kOutOfSpace;
128 } else {
129 fd_ = open(filename_.c_str(), O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
130 if (fd_ == -1 && errno == ENOSPC) {
131 return WriteCode::kOutOfSpace;
132 } else {
133 PCHECK(fd_ != -1) << ": Failed to open " << filename_ << " for writing";
134 VLOG(1) << "Opened " << filename_ << " for writing";
135 }
136
137 flags_ = fcntl(fd_, F_GETFL, 0);
138 PCHECK(flags_ >= 0) << ": Failed to get flags for " << this->filename();
139
140 EnableDirect();
141
142 CHECK(std::filesystem::exists(filename_));
143
144 return WriteCode::kOk;
145 }
146}
147
148void FileHandler::EnableDirect() {
149 if (supports_odirect_ && !ODirectEnabled()) {
150 const int new_flags = flags_ | O_DIRECT;
151 // Track if we failed to set O_DIRECT. Note: Austin hasn't seen this call
152 // fail. The write call tends to fail instead.
153 if (fcntl(fd_, F_SETFL, new_flags) == -1) {
154 PLOG(WARNING) << "Failed to set O_DIRECT on " << filename();
155 supports_odirect_ = false;
156 } else {
157 VLOG(1) << "Enabled O_DIRECT on " << filename();
158 flags_ = new_flags;
159 }
160 }
161}
162
163void FileHandler::DisableDirect() {
164 if (supports_odirect_ && ODirectEnabled()) {
165 flags_ = flags_ & (~O_DIRECT);
166 PCHECK(fcntl(fd_, F_SETFL, flags_) != -1) << ": Failed to disable O_DIRECT";
167 VLOG(1) << "Disabled O_DIRECT on " << filename();
168 }
169}
170
171WriteResult FileHandler::Write(
172 const absl::Span<const absl::Span<const uint8_t>> &queue) {
173 iovec_.clear();
Austin Schuh3ebaf782023-04-07 16:03:28 -0700174 CHECK_LE(queue.size(), static_cast<size_t>(IOV_MAX));
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700175
176 queue_aligner_.FillAlignedQueue(queue);
177 CHECK_LE(queue_aligner_.aligned_queue().size(), static_cast<size_t>(IOV_MAX));
Alexei Strots01395492023-03-20 13:59:56 -0700178
179 // Ok, we now need to figure out if we were aligned, and if we were, how much
180 // of the data we are being asked to write is aligned.
181 //
Austin Schuh3ebaf782023-04-07 16:03:28 -0700182 // When writing with O_DIRECT, the kernel only will accept writes where the
183 // offset into the file is a multiple of kSector, the data is aligned to
184 // kSector in memory, and the length being written is a multiple of kSector.
185 // Some of the callers use an aligned ResizeableBuffer to generate 512 byte
186 // aligned buffers for this code to find and use.
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700187 bool was_aligned = IsAligned(total_write_bytes_);
188 VLOG(1) << "Started " << (was_aligned ? "aligned" : "unaligned")
189 << " at offset " << total_write_bytes_ << " on " << filename();
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700190
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700191 // Walk through aligned queue and batch writes basel on aligned flag
192 for (const auto &item : queue_aligner_.aligned_queue()) {
193 if (was_aligned != item.aligned) {
194 // Switching aligned context. Let's flush current batch.
195 if (!iovec_.empty()) {
196 // Flush current queue if we need.
197 const auto code = WriteV(iovec_, was_aligned);
Austin Schuh3ebaf782023-04-07 16:03:28 -0700198 if (code == WriteCode::kOutOfSpace) {
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700199 // We cannot say anything about what number of messages was written
200 // for sure.
Austin Schuh3ebaf782023-04-07 16:03:28 -0700201 return {
202 .code = code,
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700203 .messages_written = queue.size(),
Austin Schuh3ebaf782023-04-07 16:03:28 -0700204 };
205 }
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700206 iovec_.clear();
Austin Schuh3ebaf782023-04-07 16:03:28 -0700207 }
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700208 // Write queue is flushed. WriteV updates the total_write_bytes_.
209 was_aligned = IsAligned(total_write_bytes_) && item.aligned;
Alexei Strots01395492023-03-20 13:59:56 -0700210 }
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700211 iovec_.push_back(
212 {.iov_base = const_cast<uint8_t *>(item.data), .iov_len = item.size});
Alexei Strots01395492023-03-20 13:59:56 -0700213 }
214
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700215 WriteCode result_code = WriteCode::kOk;
216 if (!iovec_.empty()) {
217 // Flush current queue if we need.
218 result_code = WriteV(iovec_, was_aligned);
219 }
Alexei Strots01395492023-03-20 13:59:56 -0700220 return {
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700221 .code = result_code,
Austin Schuh3ebaf782023-04-07 16:03:28 -0700222 .messages_written = queue.size(),
Alexei Strots01395492023-03-20 13:59:56 -0700223 };
224}
225
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700226WriteCode FileHandler::WriteV(const std::vector<struct iovec> &iovec,
227 bool aligned) {
Alexei Strots01395492023-03-20 13:59:56 -0700228 // Configure the file descriptor to match the mode we should be in. This is
229 // safe to over-call since it only does the syscall if needed.
230 if (aligned) {
231 EnableDirect();
232 } else {
233 DisableDirect();
234 }
235
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700236 VLOG(2) << "Flushing queue of " << iovec.size() << " elements, "
237 << (aligned ? "aligned" : "unaligned");
238
239 CHECK_GT(iovec.size(), 0u);
Alexei Strots01395492023-03-20 13:59:56 -0700240 const auto start = aos::monotonic_clock::now();
Alexei Strots01395492023-03-20 13:59:56 -0700241
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700242 // Validation of alignment assumptions.
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700243 if (aligned) {
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700244 CHECK(IsAligned(total_write_bytes_))
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700245 << ": Failed after writing " << total_write_bytes_
246 << " to the file, attempting aligned write with unaligned start.";
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700247
248 for (const auto &iovec_item : iovec) {
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700249 absl::Span<const uint8_t> data(
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700250 reinterpret_cast<const uint8_t *>(iovec_item.iov_base),
251 iovec_item.iov_len);
252 VLOG(2) << " iov_base " << static_cast<void *>(iovec_item.iov_base)
253 << ", iov_len " << iovec_item.iov_len;
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700254 CHECK(IsAlignedStart(data) && IsAlignedLength(data));
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700255 }
256 }
257
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700258 // Calculation of expected written size.
259 size_t counted_size = 0;
260 for (const auto &iovec_item : iovec) {
261 CHECK_GT(iovec_item.iov_len, 0u);
262 counted_size += iovec_item.iov_len;
Austin Schuh3ebaf782023-04-07 16:03:28 -0700263 }
264
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700265 VLOG(2) << "Going to write " << counted_size;
266 CHECK_GT(counted_size, 0u);
267
268 const ssize_t written = writev(fd_, iovec.data(), iovec.size());
269 VLOG(2) << "Wrote " << written << ", for iovec size " << iovec.size();
270
271 const auto end = aos::monotonic_clock::now();
272 if (written == -1 && errno == ENOSPC) {
273 return WriteCode::kOutOfSpace;
274 }
275 PCHECK(written >= 0) << ": write failed, got " << written;
276 if (written < static_cast<ssize_t>(counted_size)) {
277 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
278 // never seems to happen in any other case. If we ever want to log to a
279 // socket, this will happen more often. However, until we get there, we'll
280 // just assume it means we ran out of space.
281 return WriteCode::kOutOfSpace;
282 }
Austin Schuh3ebaf782023-04-07 16:03:28 -0700283
Austin Schuh9e8df9e2023-05-03 08:28:29 -0700284 if (FLAGS_sync) {
Alexei Strots01395492023-03-20 13:59:56 -0700285 // Flush asynchronously and force the data out of the cache.
286 sync_file_range(fd_, total_write_bytes_, written, SYNC_FILE_RANGE_WRITE);
287 if (last_synced_bytes_ != 0) {
288 // Per Linus' recommendation online on how to do fast file IO, do a
289 // blocking flush of the previous write chunk, and then tell the kernel to
290 // drop the pages from the cache. This makes sure we can't get too far
291 // ahead.
292 sync_file_range(fd_, last_synced_bytes_,
293 total_write_bytes_ - last_synced_bytes_,
294 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
295 SYNC_FILE_RANGE_WAIT_AFTER);
296 posix_fadvise(fd_, last_synced_bytes_,
297 total_write_bytes_ - last_synced_bytes_,
298 POSIX_FADV_DONTNEED);
299
Alexei Strots01395492023-03-20 13:59:56 -0700300 }
Austin Schuh3c4029f2023-04-13 12:09:55 -0700301 last_synced_bytes_ = total_write_bytes_;
Alexei Strots01395492023-03-20 13:59:56 -0700302 }
303
Alexei Strots01395492023-03-20 13:59:56 -0700304 total_write_bytes_ += written;
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700305 if (aligned) {
306 written_aligned_ += written;
307 }
308 write_stats_.UpdateStats(end - start, written, iovec.size());
Alexei Strots01395492023-03-20 13:59:56 -0700309 return WriteCode::kOk;
310}
311
312WriteCode FileHandler::Close() {
313 if (!is_open()) {
314 return WriteCode::kOk;
315 }
316 bool ran_out_of_space = false;
317 if (close(fd_) == -1) {
318 if (errno == ENOSPC) {
319 ran_out_of_space = true;
320 } else {
321 PLOG(ERROR) << "Closing log file failed";
322 }
323 }
324 fd_ = -1;
325 VLOG(1) << "Closed " << filename_;
326 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
327}
328
329FileBackend::FileBackend(std::string_view base_name)
330 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
331
332std::unique_ptr<FileHandler> FileBackend::RequestFile(std::string_view id) {
333 const std::string filename = absl::StrCat(base_name_, separator_, id);
334 return std::make_unique<FileHandler>(filename);
335}
336
337RenamableFileBackend::RenamableFileBackend(std::string_view base_name)
338 : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {}
339
340std::unique_ptr<FileHandler> RenamableFileBackend::RequestFile(
341 std::string_view id) {
342 const std::string filename =
343 absl::StrCat(base_name_, separator_, id, temp_suffix_);
344 return std::make_unique<RenamableFileHandler>(this, filename);
345}
346
347void RenamableFileBackend::EnableTempFiles() {
348 use_temp_files_ = true;
349 temp_suffix_ = kTempExtension;
350}
351
352bool RenamableFileBackend::RenameLogBase(std::string_view new_base_name) {
353 if (new_base_name == base_name_) {
354 return true;
355 }
356 CHECK(old_base_name_.empty())
357 << "Only one change of base_name is supported. Was: " << old_base_name_;
358
359 std::string current_directory = base_name_;
360 std::string new_directory(new_base_name);
361
362 auto current_path_split = current_directory.rfind("/");
363 CHECK(current_path_split != std::string::npos)
364 << "Could not find / in the current directory path";
365 auto new_path_split = new_directory.rfind("/");
366 CHECK(new_path_split != std::string::npos)
367 << "Could not find / in the new directory path";
368
369 CHECK(new_base_name.substr(new_path_split) ==
370 current_directory.substr(current_path_split))
371 << "Rename of file base from " << current_directory << " to "
372 << new_directory << " is not supported.";
373
374 current_directory.resize(current_path_split);
375 new_directory.resize(new_path_split);
376 DIR *dir = opendir(current_directory.c_str());
377 if (dir) {
378 closedir(dir);
379 const int result = rename(current_directory.c_str(), new_directory.c_str());
380 if (result != 0) {
381 PLOG(ERROR) << "Unable to rename " << current_directory << " to "
382 << new_directory;
383 return false;
384 }
385 } else {
386 // Handle if directory was already renamed.
387 dir = opendir(new_directory.c_str());
388 if (!dir) {
389 LOG(ERROR) << "Old directory " << current_directory
390 << " missing and new directory " << new_directory
391 << " not present.";
392 return false;
393 }
394 closedir(dir);
395 }
396 old_base_name_ = base_name_;
397 base_name_ = std::string(new_base_name);
398 separator_ = base_name_.back() == '/' ? "" : "_";
399 return true;
400}
401
402WriteCode RenamableFileBackend::RenameFileAfterClose(
403 std::string_view filename) {
404 // Fast check that we can skip rename.
405 if (!use_temp_files_ && old_base_name_.empty()) {
406 return WriteCode::kOk;
407 }
408
409 std::string current_filename(filename);
410
411 // When changing the base name, we rename the log folder while there active
412 // buffer writers. Therefore, the name of that active buffer may still refer
413 // to the old file location rather than the new one.
414 if (!old_base_name_.empty()) {
415 auto offset = current_filename.find(old_base_name_);
416 if (offset != std::string::npos) {
417 current_filename.replace(offset, old_base_name_.length(), base_name_);
418 }
419 }
420
421 std::string final_filename = current_filename;
422 if (use_temp_files_) {
423 CHECK(current_filename.size() > temp_suffix_.size());
424 final_filename = current_filename.substr(
425 0, current_filename.size() - temp_suffix_.size());
426 }
427
428 int result = rename(current_filename.c_str(), final_filename.c_str());
429
430 bool ran_out_of_space = false;
431 if (result != 0) {
432 if (errno == ENOSPC) {
433 ran_out_of_space = true;
434 } else {
435 PLOG(FATAL) << "Renaming " << current_filename << " to " << final_filename
436 << " failed";
437 }
438 } else {
439 VLOG(1) << "Renamed " << current_filename << " -> " << final_filename;
440 }
441 return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk;
442}
443
444WriteCode RenamableFileBackend::RenamableFileHandler::Close() {
445 if (!is_open()) {
446 return WriteCode::kOk;
447 }
448 if (FileHandler::Close() == WriteCode::kOutOfSpace) {
449 return WriteCode::kOutOfSpace;
450 }
451 if (owner_->RenameFileAfterClose(filename()) == WriteCode::kOutOfSpace) {
452 return WriteCode::kOutOfSpace;
453 }
454 return WriteCode::kOk;
455}
Alexei Strotsa0b99d72023-04-11 15:12:42 -0700456
Austin Schuh3ebaf782023-04-07 16:03:28 -0700457} // namespace aos::logger