Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 1 | #include "aos/events/logging/log_backend.h" |
| 2 | |
| 3 | #include <dirent.h> |
| 4 | |
| 5 | #include <filesystem> |
| 6 | |
| 7 | #include "absl/strings/str_cat.h" |
| 8 | #include "aos/util/file.h" |
| 9 | #include "glog/logging.h" |
| 10 | |
| 11 | DEFINE_bool(direct, false, |
| 12 | "If true, write using O_DIRECT and write 512 byte aligned blocks " |
| 13 | "whenever possible."); |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 14 | DEFINE_bool( |
| 15 | sync, false, |
| 16 | "If true, sync data to disk as we go so we don't get too far ahead. Also " |
| 17 | "fadvise that we are done with the memory once it hits disk."); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 18 | |
| 19 | namespace aos::logger { |
| 20 | namespace { |
| 21 | constexpr const char *kTempExtension = ".tmp"; |
| 22 | } |
| 23 | |
| 24 | FileHandler::FileHandler(std::string filename) |
| 25 | : filename_(std::move(filename)), supports_odirect_(FLAGS_direct) {} |
| 26 | |
| 27 | FileHandler::~FileHandler() { Close(); } |
| 28 | |
| 29 | WriteCode FileHandler::OpenForWrite() { |
| 30 | iovec_.reserve(10); |
| 31 | if (!aos::util::MkdirPIfSpace(filename_, 0777)) { |
| 32 | return WriteCode::kOutOfSpace; |
| 33 | } else { |
| 34 | fd_ = open(filename_.c_str(), O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774); |
| 35 | if (fd_ == -1 && errno == ENOSPC) { |
| 36 | return WriteCode::kOutOfSpace; |
| 37 | } else { |
| 38 | PCHECK(fd_ != -1) << ": Failed to open " << filename_ << " for writing"; |
| 39 | VLOG(1) << "Opened " << filename_ << " for writing"; |
| 40 | } |
| 41 | |
| 42 | flags_ = fcntl(fd_, F_GETFL, 0); |
| 43 | PCHECK(flags_ >= 0) << ": Failed to get flags for " << this->filename(); |
| 44 | |
| 45 | EnableDirect(); |
| 46 | |
| 47 | CHECK(std::filesystem::exists(filename_)); |
| 48 | |
| 49 | return WriteCode::kOk; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | void FileHandler::EnableDirect() { |
| 54 | if (supports_odirect_ && !ODirectEnabled()) { |
| 55 | const int new_flags = flags_ | O_DIRECT; |
| 56 | // Track if we failed to set O_DIRECT. Note: Austin hasn't seen this call |
| 57 | // fail. The write call tends to fail instead. |
| 58 | if (fcntl(fd_, F_SETFL, new_flags) == -1) { |
| 59 | PLOG(WARNING) << "Failed to set O_DIRECT on " << filename(); |
| 60 | supports_odirect_ = false; |
| 61 | } else { |
| 62 | VLOG(1) << "Enabled O_DIRECT on " << filename(); |
| 63 | flags_ = new_flags; |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | void FileHandler::DisableDirect() { |
| 69 | if (supports_odirect_ && ODirectEnabled()) { |
| 70 | flags_ = flags_ & (~O_DIRECT); |
| 71 | PCHECK(fcntl(fd_, F_SETFL, flags_) != -1) << ": Failed to disable O_DIRECT"; |
| 72 | VLOG(1) << "Disabled O_DIRECT on " << filename(); |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | WriteResult FileHandler::Write( |
| 77 | const absl::Span<const absl::Span<const uint8_t>> &queue) { |
| 78 | iovec_.clear(); |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 79 | CHECK_LE(queue.size(), static_cast<size_t>(IOV_MAX)); |
| 80 | iovec_.resize(queue.size()); |
| 81 | // Size of the data currently in iovec_. |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 82 | size_t counted_size = 0; |
| 83 | |
| 84 | // Ok, we now need to figure out if we were aligned, and if we were, how much |
| 85 | // of the data we are being asked to write is aligned. |
| 86 | // |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 87 | // When writing with O_DIRECT, the kernel only will accept writes where the |
| 88 | // offset into the file is a multiple of kSector, the data is aligned to |
| 89 | // kSector in memory, and the length being written is a multiple of kSector. |
| 90 | // Some of the callers use an aligned ResizeableBuffer to generate 512 byte |
| 91 | // aligned buffers for this code to find and use. |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 92 | bool aligned = (total_write_bytes_ % kSector) == 0; |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 93 | |
| 94 | // Index we are filling in next. Keeps resetting back to 0 as we write |
| 95 | // intermediates. |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 96 | size_t write_index = 0; |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 97 | for (size_t i = 0; i < queue.size(); ++i) { |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 98 | iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data()); |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 99 | iovec_[write_index].iov_len = queue[i].size(); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 100 | |
| 101 | // Make sure the address is aligned, or give up. This should be uncommon, |
| 102 | // but is always possible. |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 103 | if ((reinterpret_cast<size_t>(iovec_[write_index].iov_base) % kSector) != |
| 104 | 0) { |
| 105 | // Flush if we were aligned and have data. |
| 106 | if (aligned && write_index != 0) { |
| 107 | VLOG(1) << "Was aligned, now is not, writing previous data"; |
| 108 | const auto code = |
| 109 | WriteV(iovec_.data(), write_index, true, counted_size); |
| 110 | if (code == WriteCode::kOutOfSpace) { |
| 111 | return { |
| 112 | .code = code, |
| 113 | .messages_written = i, |
| 114 | }; |
| 115 | } |
| 116 | |
| 117 | // Now, everything before here has been written. Make an iovec out of |
| 118 | // the rest and keep going. |
| 119 | write_index = 0; |
| 120 | counted_size = 0; |
| 121 | |
| 122 | iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data()); |
| 123 | iovec_[write_index].iov_len = queue[i].size(); |
| 124 | } |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 125 | aligned = false; |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 126 | } else { |
| 127 | // We are now starting aligned again, and have data worth writing! Flush |
| 128 | // what was there before. |
| 129 | if (!aligned && iovec_[write_index].iov_len >= kSector && |
| 130 | write_index != 0) { |
| 131 | VLOG(1) << "Was not aligned, now is, writing previous data"; |
| 132 | |
| 133 | const auto code = |
| 134 | WriteV(iovec_.data(), write_index, false, counted_size); |
| 135 | if (code == WriteCode::kOutOfSpace) { |
| 136 | return { |
| 137 | .code = code, |
| 138 | .messages_written = i, |
| 139 | }; |
| 140 | } |
| 141 | |
| 142 | // Now, everything before here has been written. Make an iovec out of |
| 143 | // the rest and keep going. |
| 144 | write_index = 0; |
| 145 | counted_size = 0; |
| 146 | |
| 147 | iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data()); |
| 148 | iovec_[write_index].iov_len = queue[i].size(); |
| 149 | aligned = true; |
| 150 | } |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 151 | } |
| 152 | |
| 153 | // Now, see if the length is a multiple of kSector. The goal is to figure |
| 154 | // out if/how much memory we can write out with O_DIRECT so that only the |
| 155 | // last little bit is done with non-direct IO to keep it fast. |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 156 | if ((iovec_[write_index].iov_len % kSector) != 0) { |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 157 | VLOG(1) << "Unaligned length " << iovec_[write_index].iov_len << " on " |
| 158 | << filename(); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 159 | // If we've got over a sector of data to write, write it out with O_DIRECT |
| 160 | // and then continue writing the rest unaligned. |
| 161 | if (aligned && iovec_[write_index].iov_len > kSector) { |
| 162 | const size_t aligned_size = |
| 163 | iovec_[write_index].iov_len & (~(kSector - 1)); |
| 164 | VLOG(1) << "Was aligned, writing last chunk rounded from " |
| 165 | << queue[i].size() << " to " << aligned_size; |
| 166 | iovec_[write_index].iov_len = aligned_size; |
| 167 | |
| 168 | const auto code = |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 169 | WriteV(iovec_.data(), write_index + 1, true, counted_size + aligned_size); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 170 | if (code == WriteCode::kOutOfSpace) { |
| 171 | return { |
| 172 | .code = code, |
| 173 | .messages_written = i, |
| 174 | }; |
| 175 | } |
| 176 | |
| 177 | // Now, everything before here has been written. Make an iovec out of |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 178 | // the rest and keep going. |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 179 | write_index = 0; |
| 180 | counted_size = 0; |
| 181 | |
| 182 | iovec_[write_index].iov_base = |
| 183 | const_cast<uint8_t *>(queue[i].data() + aligned_size); |
| 184 | iovec_[write_index].iov_len = queue[i].size() - aligned_size; |
| 185 | } |
| 186 | aligned = false; |
| 187 | } |
| 188 | VLOG(1) << "Writing " << iovec_[write_index].iov_len << " to " |
| 189 | << filename(); |
| 190 | counted_size += iovec_[write_index].iov_len; |
| 191 | ++write_index; |
| 192 | } |
| 193 | |
| 194 | // Either write the aligned data if it is all aligned, or write the rest |
| 195 | // unaligned if we wrote aligned up above. |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 196 | const auto code = WriteV(iovec_.data(), write_index, aligned, counted_size); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 197 | return { |
| 198 | .code = code, |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 199 | .messages_written = queue.size(), |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 200 | }; |
| 201 | } |
| 202 | |
| 203 | WriteCode FileHandler::WriteV(struct iovec *iovec_data, size_t iovec_size, |
| 204 | bool aligned, size_t counted_size) { |
| 205 | // Configure the file descriptor to match the mode we should be in. This is |
| 206 | // safe to over-call since it only does the syscall if needed. |
| 207 | if (aligned) { |
| 208 | EnableDirect(); |
| 209 | } else { |
| 210 | DisableDirect(); |
| 211 | } |
| 212 | |
| 213 | const auto start = aos::monotonic_clock::now(); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 214 | |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 215 | if (VLOG_IS_ON(2)) { |
| 216 | size_t to_be_written = 0; |
| 217 | for (size_t i = 0; i < iovec_size; ++i) { |
| 218 | VLOG(2) << " iov_base " << static_cast<void *>(iovec_data[i].iov_base) |
| 219 | << ", iov_len " << iovec_data[i].iov_len; |
| 220 | to_be_written += iovec_data[i].iov_len; |
| 221 | } |
| 222 | CHECK_GT(to_be_written, 0u); |
| 223 | VLOG(2) << "Going to write " << to_be_written; |
| 224 | } |
| 225 | |
| 226 | const ssize_t written = writev(fd_, iovec_data, iovec_size); |
| 227 | VLOG(2) << "Wrote " << written << ", for iovec size " << iovec_size; |
| 228 | |
| 229 | if (FLAGS_sync && written > 0) { |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 230 | // Flush asynchronously and force the data out of the cache. |
| 231 | sync_file_range(fd_, total_write_bytes_, written, SYNC_FILE_RANGE_WRITE); |
| 232 | if (last_synced_bytes_ != 0) { |
| 233 | // Per Linus' recommendation online on how to do fast file IO, do a |
| 234 | // blocking flush of the previous write chunk, and then tell the kernel to |
| 235 | // drop the pages from the cache. This makes sure we can't get too far |
| 236 | // ahead. |
| 237 | sync_file_range(fd_, last_synced_bytes_, |
| 238 | total_write_bytes_ - last_synced_bytes_, |
| 239 | SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | |
| 240 | SYNC_FILE_RANGE_WAIT_AFTER); |
| 241 | posix_fadvise(fd_, last_synced_bytes_, |
| 242 | total_write_bytes_ - last_synced_bytes_, |
| 243 | POSIX_FADV_DONTNEED); |
| 244 | |
| 245 | last_synced_bytes_ = total_write_bytes_; |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | const auto end = aos::monotonic_clock::now(); |
| 250 | if (written == -1 && errno == ENOSPC) { |
| 251 | return WriteCode::kOutOfSpace; |
| 252 | } |
| 253 | PCHECK(written >= 0) << ": write failed, got " << written; |
| 254 | if (written < static_cast<ssize_t>(counted_size)) { |
| 255 | // Sometimes this happens instead of ENOSPC. On a real filesystem, this |
| 256 | // never seems to happen in any other case. If we ever want to log to a |
| 257 | // socket, this will happen more often. However, until we get there, we'll |
| 258 | // just assume it means we ran out of space. |
| 259 | return WriteCode::kOutOfSpace; |
| 260 | } |
| 261 | |
| 262 | total_write_bytes_ += written; |
| 263 | write_stats_.UpdateStats(end - start, written, iovec_size); |
| 264 | return WriteCode::kOk; |
| 265 | } |
| 266 | |
| 267 | WriteCode FileHandler::Close() { |
| 268 | if (!is_open()) { |
| 269 | return WriteCode::kOk; |
| 270 | } |
| 271 | bool ran_out_of_space = false; |
| 272 | if (close(fd_) == -1) { |
| 273 | if (errno == ENOSPC) { |
| 274 | ran_out_of_space = true; |
| 275 | } else { |
| 276 | PLOG(ERROR) << "Closing log file failed"; |
| 277 | } |
| 278 | } |
| 279 | fd_ = -1; |
| 280 | VLOG(1) << "Closed " << filename_; |
| 281 | return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk; |
| 282 | } |
| 283 | |
| 284 | FileBackend::FileBackend(std::string_view base_name) |
| 285 | : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {} |
| 286 | |
| 287 | std::unique_ptr<FileHandler> FileBackend::RequestFile(std::string_view id) { |
| 288 | const std::string filename = absl::StrCat(base_name_, separator_, id); |
| 289 | return std::make_unique<FileHandler>(filename); |
| 290 | } |
| 291 | |
| 292 | RenamableFileBackend::RenamableFileBackend(std::string_view base_name) |
| 293 | : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {} |
| 294 | |
| 295 | std::unique_ptr<FileHandler> RenamableFileBackend::RequestFile( |
| 296 | std::string_view id) { |
| 297 | const std::string filename = |
| 298 | absl::StrCat(base_name_, separator_, id, temp_suffix_); |
| 299 | return std::make_unique<RenamableFileHandler>(this, filename); |
| 300 | } |
| 301 | |
| 302 | void RenamableFileBackend::EnableTempFiles() { |
| 303 | use_temp_files_ = true; |
| 304 | temp_suffix_ = kTempExtension; |
| 305 | } |
| 306 | |
| 307 | bool RenamableFileBackend::RenameLogBase(std::string_view new_base_name) { |
| 308 | if (new_base_name == base_name_) { |
| 309 | return true; |
| 310 | } |
| 311 | CHECK(old_base_name_.empty()) |
| 312 | << "Only one change of base_name is supported. Was: " << old_base_name_; |
| 313 | |
| 314 | std::string current_directory = base_name_; |
| 315 | std::string new_directory(new_base_name); |
| 316 | |
| 317 | auto current_path_split = current_directory.rfind("/"); |
| 318 | CHECK(current_path_split != std::string::npos) |
| 319 | << "Could not find / in the current directory path"; |
| 320 | auto new_path_split = new_directory.rfind("/"); |
| 321 | CHECK(new_path_split != std::string::npos) |
| 322 | << "Could not find / in the new directory path"; |
| 323 | |
| 324 | CHECK(new_base_name.substr(new_path_split) == |
| 325 | current_directory.substr(current_path_split)) |
| 326 | << "Rename of file base from " << current_directory << " to " |
| 327 | << new_directory << " is not supported."; |
| 328 | |
| 329 | current_directory.resize(current_path_split); |
| 330 | new_directory.resize(new_path_split); |
| 331 | DIR *dir = opendir(current_directory.c_str()); |
| 332 | if (dir) { |
| 333 | closedir(dir); |
| 334 | const int result = rename(current_directory.c_str(), new_directory.c_str()); |
| 335 | if (result != 0) { |
| 336 | PLOG(ERROR) << "Unable to rename " << current_directory << " to " |
| 337 | << new_directory; |
| 338 | return false; |
| 339 | } |
| 340 | } else { |
| 341 | // Handle if directory was already renamed. |
| 342 | dir = opendir(new_directory.c_str()); |
| 343 | if (!dir) { |
| 344 | LOG(ERROR) << "Old directory " << current_directory |
| 345 | << " missing and new directory " << new_directory |
| 346 | << " not present."; |
| 347 | return false; |
| 348 | } |
| 349 | closedir(dir); |
| 350 | } |
| 351 | old_base_name_ = base_name_; |
| 352 | base_name_ = std::string(new_base_name); |
| 353 | separator_ = base_name_.back() == '/' ? "" : "_"; |
| 354 | return true; |
| 355 | } |
| 356 | |
| 357 | WriteCode RenamableFileBackend::RenameFileAfterClose( |
| 358 | std::string_view filename) { |
| 359 | // Fast check that we can skip rename. |
| 360 | if (!use_temp_files_ && old_base_name_.empty()) { |
| 361 | return WriteCode::kOk; |
| 362 | } |
| 363 | |
| 364 | std::string current_filename(filename); |
| 365 | |
| 366 | // When changing the base name, we rename the log folder while there active |
| 367 | // buffer writers. Therefore, the name of that active buffer may still refer |
| 368 | // to the old file location rather than the new one. |
| 369 | if (!old_base_name_.empty()) { |
| 370 | auto offset = current_filename.find(old_base_name_); |
| 371 | if (offset != std::string::npos) { |
| 372 | current_filename.replace(offset, old_base_name_.length(), base_name_); |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | std::string final_filename = current_filename; |
| 377 | if (use_temp_files_) { |
| 378 | CHECK(current_filename.size() > temp_suffix_.size()); |
| 379 | final_filename = current_filename.substr( |
| 380 | 0, current_filename.size() - temp_suffix_.size()); |
| 381 | } |
| 382 | |
| 383 | int result = rename(current_filename.c_str(), final_filename.c_str()); |
| 384 | |
| 385 | bool ran_out_of_space = false; |
| 386 | if (result != 0) { |
| 387 | if (errno == ENOSPC) { |
| 388 | ran_out_of_space = true; |
| 389 | } else { |
| 390 | PLOG(FATAL) << "Renaming " << current_filename << " to " << final_filename |
| 391 | << " failed"; |
| 392 | } |
| 393 | } else { |
| 394 | VLOG(1) << "Renamed " << current_filename << " -> " << final_filename; |
| 395 | } |
| 396 | return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk; |
| 397 | } |
| 398 | |
| 399 | WriteCode RenamableFileBackend::RenamableFileHandler::Close() { |
| 400 | if (!is_open()) { |
| 401 | return WriteCode::kOk; |
| 402 | } |
| 403 | if (FileHandler::Close() == WriteCode::kOutOfSpace) { |
| 404 | return WriteCode::kOutOfSpace; |
| 405 | } |
| 406 | if (owner_->RenameFileAfterClose(filename()) == WriteCode::kOutOfSpace) { |
| 407 | return WriteCode::kOutOfSpace; |
| 408 | } |
| 409 | return WriteCode::kOk; |
| 410 | } |
Austin Schuh | 3ebaf78 | 2023-04-07 16:03:28 -0700 | [diff] [blame^] | 411 | } // namespace aos::logger |