Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 1 | #include "aos/events/logging/log_backend.h" |
| 2 | |
| 3 | #include <dirent.h> |
| 4 | |
| 5 | #include <filesystem> |
| 6 | |
| 7 | #include "absl/strings/str_cat.h" |
| 8 | #include "aos/util/file.h" |
| 9 | #include "glog/logging.h" |
| 10 | |
| 11 | DEFINE_bool(direct, false, |
| 12 | "If true, write using O_DIRECT and write 512 byte aligned blocks " |
| 13 | "whenever possible."); |
| 14 | |
| 15 | namespace aos::logger { |
| 16 | namespace { |
| 17 | constexpr const char *kTempExtension = ".tmp"; |
| 18 | } |
| 19 | |
| 20 | FileHandler::FileHandler(std::string filename) |
| 21 | : filename_(std::move(filename)), supports_odirect_(FLAGS_direct) {} |
| 22 | |
| 23 | FileHandler::~FileHandler() { Close(); } |
| 24 | |
| 25 | WriteCode FileHandler::OpenForWrite() { |
| 26 | iovec_.reserve(10); |
| 27 | if (!aos::util::MkdirPIfSpace(filename_, 0777)) { |
| 28 | return WriteCode::kOutOfSpace; |
| 29 | } else { |
| 30 | fd_ = open(filename_.c_str(), O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774); |
| 31 | if (fd_ == -1 && errno == ENOSPC) { |
| 32 | return WriteCode::kOutOfSpace; |
| 33 | } else { |
| 34 | PCHECK(fd_ != -1) << ": Failed to open " << filename_ << " for writing"; |
| 35 | VLOG(1) << "Opened " << filename_ << " for writing"; |
| 36 | } |
| 37 | |
| 38 | flags_ = fcntl(fd_, F_GETFL, 0); |
| 39 | PCHECK(flags_ >= 0) << ": Failed to get flags for " << this->filename(); |
| 40 | |
| 41 | EnableDirect(); |
| 42 | |
| 43 | CHECK(std::filesystem::exists(filename_)); |
| 44 | |
| 45 | return WriteCode::kOk; |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | void FileHandler::EnableDirect() { |
| 50 | if (supports_odirect_ && !ODirectEnabled()) { |
| 51 | const int new_flags = flags_ | O_DIRECT; |
| 52 | // Track if we failed to set O_DIRECT. Note: Austin hasn't seen this call |
| 53 | // fail. The write call tends to fail instead. |
| 54 | if (fcntl(fd_, F_SETFL, new_flags) == -1) { |
| 55 | PLOG(WARNING) << "Failed to set O_DIRECT on " << filename(); |
| 56 | supports_odirect_ = false; |
| 57 | } else { |
| 58 | VLOG(1) << "Enabled O_DIRECT on " << filename(); |
| 59 | flags_ = new_flags; |
| 60 | } |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | void FileHandler::DisableDirect() { |
| 65 | if (supports_odirect_ && ODirectEnabled()) { |
| 66 | flags_ = flags_ & (~O_DIRECT); |
| 67 | PCHECK(fcntl(fd_, F_SETFL, flags_) != -1) << ": Failed to disable O_DIRECT"; |
| 68 | VLOG(1) << "Disabled O_DIRECT on " << filename(); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | WriteResult FileHandler::Write( |
| 73 | const absl::Span<const absl::Span<const uint8_t>> &queue) { |
| 74 | iovec_.clear(); |
| 75 | size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX); |
| 76 | iovec_.resize(iovec_size); |
| 77 | size_t counted_size = 0; |
| 78 | |
| 79 | // Ok, we now need to figure out if we were aligned, and if we were, how much |
| 80 | // of the data we are being asked to write is aligned. |
| 81 | // |
| 82 | // The file is aligned if it is a multiple of kSector in length. The data is |
| 83 | // aligned if it's memory is kSector aligned, and the length is a multiple of |
| 84 | // kSector in length. |
| 85 | bool aligned = (total_write_bytes_ % kSector) == 0; |
| 86 | size_t write_index = 0; |
| 87 | for (size_t i = 0; i < iovec_size; ++i) { |
| 88 | iovec_[write_index].iov_base = const_cast<uint8_t *>(queue[i].data()); |
| 89 | |
| 90 | // Make sure the address is aligned, or give up. This should be uncommon, |
| 91 | // but is always possible. |
| 92 | if ((reinterpret_cast<size_t>(iovec_[write_index].iov_base) & |
| 93 | (kSector - 1)) != 0) { |
| 94 | aligned = false; |
| 95 | } |
| 96 | |
| 97 | // Now, see if the length is a multiple of kSector. The goal is to figure |
| 98 | // out if/how much memory we can write out with O_DIRECT so that only the |
| 99 | // last little bit is done with non-direct IO to keep it fast. |
| 100 | iovec_[write_index].iov_len = queue[i].size(); |
| 101 | if ((iovec_[write_index].iov_len % kSector) != 0) { |
| 102 | VLOG(1) << "Unaligned length on " << filename(); |
| 103 | // If we've got over a sector of data to write, write it out with O_DIRECT |
| 104 | // and then continue writing the rest unaligned. |
| 105 | if (aligned && iovec_[write_index].iov_len > kSector) { |
| 106 | const size_t aligned_size = |
| 107 | iovec_[write_index].iov_len & (~(kSector - 1)); |
| 108 | VLOG(1) << "Was aligned, writing last chunk rounded from " |
| 109 | << queue[i].size() << " to " << aligned_size; |
| 110 | iovec_[write_index].iov_len = aligned_size; |
| 111 | |
| 112 | const auto code = |
| 113 | WriteV(iovec_.data(), i + 1, true, counted_size + aligned_size); |
| 114 | if (code == WriteCode::kOutOfSpace) { |
| 115 | return { |
| 116 | .code = code, |
| 117 | .messages_written = i, |
| 118 | }; |
| 119 | } |
| 120 | |
| 121 | // Now, everything before here has been written. Make an iovec out of |
| 122 | // the last bytes, and keep going. |
| 123 | // TODO (Alexei, Austin): is it safe to do here since it can be a |
| 124 | // situation when i >= iovec_size |
| 125 | iovec_size -= write_index; |
| 126 | iovec_.resize(iovec_size); |
| 127 | write_index = 0; |
| 128 | counted_size = 0; |
| 129 | |
| 130 | iovec_[write_index].iov_base = |
| 131 | const_cast<uint8_t *>(queue[i].data() + aligned_size); |
| 132 | iovec_[write_index].iov_len = queue[i].size() - aligned_size; |
| 133 | } |
| 134 | aligned = false; |
| 135 | } |
| 136 | VLOG(1) << "Writing " << iovec_[write_index].iov_len << " to " |
| 137 | << filename(); |
| 138 | counted_size += iovec_[write_index].iov_len; |
| 139 | ++write_index; |
| 140 | } |
| 141 | |
| 142 | // Either write the aligned data if it is all aligned, or write the rest |
| 143 | // unaligned if we wrote aligned up above. |
| 144 | const auto code = WriteV(iovec_.data(), iovec_.size(), aligned, counted_size); |
| 145 | return { |
| 146 | .code = code, |
| 147 | .messages_written = iovec_size, |
| 148 | }; |
| 149 | } |
| 150 | |
| 151 | WriteCode FileHandler::WriteV(struct iovec *iovec_data, size_t iovec_size, |
| 152 | bool aligned, size_t counted_size) { |
| 153 | // Configure the file descriptor to match the mode we should be in. This is |
| 154 | // safe to over-call since it only does the syscall if needed. |
| 155 | if (aligned) { |
| 156 | EnableDirect(); |
| 157 | } else { |
| 158 | DisableDirect(); |
| 159 | } |
| 160 | |
| 161 | const auto start = aos::monotonic_clock::now(); |
| 162 | const ssize_t written = writev(fd_, iovec_data, iovec_size); |
| 163 | |
| 164 | if (written > 0) { |
| 165 | // Flush asynchronously and force the data out of the cache. |
| 166 | sync_file_range(fd_, total_write_bytes_, written, SYNC_FILE_RANGE_WRITE); |
| 167 | if (last_synced_bytes_ != 0) { |
| 168 | // Per Linus' recommendation online on how to do fast file IO, do a |
| 169 | // blocking flush of the previous write chunk, and then tell the kernel to |
| 170 | // drop the pages from the cache. This makes sure we can't get too far |
| 171 | // ahead. |
| 172 | sync_file_range(fd_, last_synced_bytes_, |
| 173 | total_write_bytes_ - last_synced_bytes_, |
| 174 | SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | |
| 175 | SYNC_FILE_RANGE_WAIT_AFTER); |
| 176 | posix_fadvise(fd_, last_synced_bytes_, |
| 177 | total_write_bytes_ - last_synced_bytes_, |
| 178 | POSIX_FADV_DONTNEED); |
| 179 | |
| 180 | last_synced_bytes_ = total_write_bytes_; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | const auto end = aos::monotonic_clock::now(); |
| 185 | if (written == -1 && errno == ENOSPC) { |
| 186 | return WriteCode::kOutOfSpace; |
| 187 | } |
| 188 | PCHECK(written >= 0) << ": write failed, got " << written; |
| 189 | if (written < static_cast<ssize_t>(counted_size)) { |
| 190 | // Sometimes this happens instead of ENOSPC. On a real filesystem, this |
| 191 | // never seems to happen in any other case. If we ever want to log to a |
| 192 | // socket, this will happen more often. However, until we get there, we'll |
| 193 | // just assume it means we ran out of space. |
| 194 | return WriteCode::kOutOfSpace; |
| 195 | } |
| 196 | |
| 197 | total_write_bytes_ += written; |
| 198 | write_stats_.UpdateStats(end - start, written, iovec_size); |
| 199 | return WriteCode::kOk; |
| 200 | } |
| 201 | |
| 202 | WriteCode FileHandler::Close() { |
| 203 | if (!is_open()) { |
| 204 | return WriteCode::kOk; |
| 205 | } |
| 206 | bool ran_out_of_space = false; |
| 207 | if (close(fd_) == -1) { |
| 208 | if (errno == ENOSPC) { |
| 209 | ran_out_of_space = true; |
| 210 | } else { |
| 211 | PLOG(ERROR) << "Closing log file failed"; |
| 212 | } |
| 213 | } |
| 214 | fd_ = -1; |
| 215 | VLOG(1) << "Closed " << filename_; |
| 216 | return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk; |
| 217 | } |
| 218 | |
| 219 | FileBackend::FileBackend(std::string_view base_name) |
| 220 | : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {} |
| 221 | |
| 222 | std::unique_ptr<FileHandler> FileBackend::RequestFile(std::string_view id) { |
| 223 | const std::string filename = absl::StrCat(base_name_, separator_, id); |
| 224 | return std::make_unique<FileHandler>(filename); |
| 225 | } |
| 226 | |
| 227 | RenamableFileBackend::RenamableFileBackend(std::string_view base_name) |
| 228 | : base_name_(base_name), separator_(base_name_.back() == '/' ? "" : "_") {} |
| 229 | |
| 230 | std::unique_ptr<FileHandler> RenamableFileBackend::RequestFile( |
| 231 | std::string_view id) { |
| 232 | const std::string filename = |
| 233 | absl::StrCat(base_name_, separator_, id, temp_suffix_); |
| 234 | return std::make_unique<RenamableFileHandler>(this, filename); |
| 235 | } |
| 236 | |
| 237 | void RenamableFileBackend::EnableTempFiles() { |
| 238 | use_temp_files_ = true; |
| 239 | temp_suffix_ = kTempExtension; |
| 240 | } |
| 241 | |
| 242 | bool RenamableFileBackend::RenameLogBase(std::string_view new_base_name) { |
| 243 | if (new_base_name == base_name_) { |
| 244 | return true; |
| 245 | } |
| 246 | CHECK(old_base_name_.empty()) |
| 247 | << "Only one change of base_name is supported. Was: " << old_base_name_; |
| 248 | |
| 249 | std::string current_directory = base_name_; |
| 250 | std::string new_directory(new_base_name); |
| 251 | |
| 252 | auto current_path_split = current_directory.rfind("/"); |
| 253 | CHECK(current_path_split != std::string::npos) |
| 254 | << "Could not find / in the current directory path"; |
| 255 | auto new_path_split = new_directory.rfind("/"); |
| 256 | CHECK(new_path_split != std::string::npos) |
| 257 | << "Could not find / in the new directory path"; |
| 258 | |
| 259 | CHECK(new_base_name.substr(new_path_split) == |
| 260 | current_directory.substr(current_path_split)) |
| 261 | << "Rename of file base from " << current_directory << " to " |
| 262 | << new_directory << " is not supported."; |
| 263 | |
| 264 | current_directory.resize(current_path_split); |
| 265 | new_directory.resize(new_path_split); |
| 266 | DIR *dir = opendir(current_directory.c_str()); |
| 267 | if (dir) { |
| 268 | closedir(dir); |
| 269 | const int result = rename(current_directory.c_str(), new_directory.c_str()); |
| 270 | if (result != 0) { |
| 271 | PLOG(ERROR) << "Unable to rename " << current_directory << " to " |
| 272 | << new_directory; |
| 273 | return false; |
| 274 | } |
| 275 | } else { |
| 276 | // Handle if directory was already renamed. |
| 277 | dir = opendir(new_directory.c_str()); |
| 278 | if (!dir) { |
| 279 | LOG(ERROR) << "Old directory " << current_directory |
| 280 | << " missing and new directory " << new_directory |
| 281 | << " not present."; |
| 282 | return false; |
| 283 | } |
| 284 | closedir(dir); |
| 285 | } |
| 286 | old_base_name_ = base_name_; |
| 287 | base_name_ = std::string(new_base_name); |
| 288 | separator_ = base_name_.back() == '/' ? "" : "_"; |
| 289 | return true; |
| 290 | } |
| 291 | |
| 292 | WriteCode RenamableFileBackend::RenameFileAfterClose( |
| 293 | std::string_view filename) { |
| 294 | // Fast check that we can skip rename. |
| 295 | if (!use_temp_files_ && old_base_name_.empty()) { |
| 296 | return WriteCode::kOk; |
| 297 | } |
| 298 | |
| 299 | std::string current_filename(filename); |
| 300 | |
| 301 | // When changing the base name, we rename the log folder while there active |
| 302 | // buffer writers. Therefore, the name of that active buffer may still refer |
| 303 | // to the old file location rather than the new one. |
| 304 | if (!old_base_name_.empty()) { |
| 305 | auto offset = current_filename.find(old_base_name_); |
| 306 | if (offset != std::string::npos) { |
| 307 | current_filename.replace(offset, old_base_name_.length(), base_name_); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | std::string final_filename = current_filename; |
| 312 | if (use_temp_files_) { |
| 313 | CHECK(current_filename.size() > temp_suffix_.size()); |
| 314 | final_filename = current_filename.substr( |
| 315 | 0, current_filename.size() - temp_suffix_.size()); |
| 316 | } |
| 317 | |
| 318 | int result = rename(current_filename.c_str(), final_filename.c_str()); |
| 319 | |
| 320 | bool ran_out_of_space = false; |
| 321 | if (result != 0) { |
| 322 | if (errno == ENOSPC) { |
| 323 | ran_out_of_space = true; |
| 324 | } else { |
| 325 | PLOG(FATAL) << "Renaming " << current_filename << " to " << final_filename |
| 326 | << " failed"; |
| 327 | } |
| 328 | } else { |
| 329 | VLOG(1) << "Renamed " << current_filename << " -> " << final_filename; |
| 330 | } |
| 331 | return ran_out_of_space ? WriteCode::kOutOfSpace : WriteCode::kOk; |
| 332 | } |
| 333 | |
| 334 | WriteCode RenamableFileBackend::RenamableFileHandler::Close() { |
| 335 | if (!is_open()) { |
| 336 | return WriteCode::kOk; |
| 337 | } |
| 338 | if (FileHandler::Close() == WriteCode::kOutOfSpace) { |
| 339 | return WriteCode::kOutOfSpace; |
| 340 | } |
| 341 | if (owner_->RenameFileAfterClose(filename()) == WriteCode::kOutOfSpace) { |
| 342 | return WriteCode::kOutOfSpace; |
| 343 | } |
| 344 | return WriteCode::kOk; |
| 345 | } |
| 346 | } // namespace aos::logger |