Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // Allow dynamic symbol lookup in an in-memory Elf image. |
| 16 | // |
| 17 | |
| 18 | #include "absl/debugging/internal/elf_mem_image.h" |
| 19 | |
| 20 | #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h |
| 21 | |
| 22 | #include <string.h> |
| 23 | #include <cassert> |
| 24 | #include <cstddef> |
| 25 | #include "absl/base/internal/raw_logging.h" |
| 26 | |
| 27 | // From binutils/include/elf/common.h (this doesn't appear to be documented |
| 28 | // anywhere else). |
| 29 | // |
| 30 | // /* This flag appears in a Versym structure. It means that the symbol |
| 31 | // is hidden, and is only visible with an explicit version number. |
| 32 | // This is a GNU extension. */ |
| 33 | // #define VERSYM_HIDDEN 0x8000 |
| 34 | // |
| 35 | // /* This is the mask for the rest of the Versym information. */ |
| 36 | // #define VERSYM_VERSION 0x7fff |
| 37 | |
| 38 | #define VERSYM_VERSION 0x7fff |
| 39 | |
| 40 | namespace absl { |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 41 | ABSL_NAMESPACE_BEGIN |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 42 | namespace debugging_internal { |
| 43 | |
| 44 | namespace { |
| 45 | |
| 46 | #if __WORDSIZE == 32 |
| 47 | const int kElfClass = ELFCLASS32; |
| 48 | int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); } |
| 49 | int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); } |
| 50 | #elif __WORDSIZE == 64 |
| 51 | const int kElfClass = ELFCLASS64; |
| 52 | int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); } |
| 53 | int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); } |
| 54 | #else |
| 55 | const int kElfClass = -1; |
| 56 | int ElfBind(const ElfW(Sym) *) { |
| 57 | ABSL_RAW_LOG(FATAL, "Unexpected word size"); |
| 58 | return 0; |
| 59 | } |
| 60 | int ElfType(const ElfW(Sym) *) { |
| 61 | ABSL_RAW_LOG(FATAL, "Unexpected word size"); |
| 62 | return 0; |
| 63 | } |
| 64 | #endif |
| 65 | |
| 66 | // Extract an element from one of the ELF tables, cast it to desired type. |
| 67 | // This is just a simple arithmetic and a glorified cast. |
| 68 | // Callers are responsible for bounds checking. |
| 69 | template <typename T> |
| 70 | const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset, |
| 71 | ElfW(Word) element_size, size_t index) { |
| 72 | return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) |
| 73 | + table_offset |
| 74 | + index * element_size); |
| 75 | } |
| 76 | |
| 77 | } // namespace |
| 78 | |
| 79 | // The value of this variable doesn't matter; it's used only for its |
| 80 | // unique address. |
| 81 | const int ElfMemImage::kInvalidBaseSentinel = 0; |
| 82 | |
| 83 | ElfMemImage::ElfMemImage(const void *base) { |
| 84 | ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer"); |
| 85 | Init(base); |
| 86 | } |
| 87 | |
| 88 | int ElfMemImage::GetNumSymbols() const { |
| 89 | if (!hash_) { |
| 90 | return 0; |
| 91 | } |
| 92 | // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash |
| 93 | return hash_[1]; |
| 94 | } |
| 95 | |
| 96 | const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { |
| 97 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); |
| 98 | return dynsym_ + index; |
| 99 | } |
| 100 | |
| 101 | const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { |
| 102 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); |
| 103 | return versym_ + index; |
| 104 | } |
| 105 | |
| 106 | const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { |
| 107 | ABSL_RAW_CHECK(index < ehdr_->e_phnum, "index out of range"); |
| 108 | return GetTableElement<ElfW(Phdr)>(ehdr_, |
| 109 | ehdr_->e_phoff, |
| 110 | ehdr_->e_phentsize, |
| 111 | index); |
| 112 | } |
| 113 | |
| 114 | const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { |
| 115 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); |
| 116 | return dynstr_ + offset; |
| 117 | } |
| 118 | |
| 119 | const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { |
| 120 | if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { |
| 121 | // Symbol corresponds to "special" (e.g. SHN_ABS) section. |
| 122 | return reinterpret_cast<const void *>(sym->st_value); |
| 123 | } |
| 124 | ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range"); |
| 125 | return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_); |
| 126 | } |
| 127 | |
| 128 | const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { |
| 129 | ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_, |
| 130 | "index out of range"); |
| 131 | const ElfW(Verdef) *version_definition = verdef_; |
| 132 | while (version_definition->vd_ndx < index && version_definition->vd_next) { |
| 133 | const char *const version_definition_as_char = |
| 134 | reinterpret_cast<const char *>(version_definition); |
| 135 | version_definition = |
| 136 | reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + |
| 137 | version_definition->vd_next); |
| 138 | } |
| 139 | return version_definition->vd_ndx == index ? version_definition : nullptr; |
| 140 | } |
| 141 | |
| 142 | const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( |
| 143 | const ElfW(Verdef) *verdef) const { |
| 144 | return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); |
| 145 | } |
| 146 | |
| 147 | const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { |
| 148 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); |
| 149 | return dynstr_ + offset; |
| 150 | } |
| 151 | |
| 152 | void ElfMemImage::Init(const void *base) { |
| 153 | ehdr_ = nullptr; |
| 154 | dynsym_ = nullptr; |
| 155 | dynstr_ = nullptr; |
| 156 | versym_ = nullptr; |
| 157 | verdef_ = nullptr; |
| 158 | hash_ = nullptr; |
| 159 | strsize_ = 0; |
| 160 | verdefnum_ = 0; |
| 161 | link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. |
| 162 | if (!base) { |
| 163 | return; |
| 164 | } |
| 165 | const char *const base_as_char = reinterpret_cast<const char *>(base); |
| 166 | if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || |
| 167 | base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { |
| 168 | assert(false); |
| 169 | return; |
| 170 | } |
| 171 | int elf_class = base_as_char[EI_CLASS]; |
| 172 | if (elf_class != kElfClass) { |
| 173 | assert(false); |
| 174 | return; |
| 175 | } |
| 176 | switch (base_as_char[EI_DATA]) { |
| 177 | case ELFDATA2LSB: { |
| 178 | if (__LITTLE_ENDIAN != __BYTE_ORDER) { |
| 179 | assert(false); |
| 180 | return; |
| 181 | } |
| 182 | break; |
| 183 | } |
| 184 | case ELFDATA2MSB: { |
| 185 | if (__BIG_ENDIAN != __BYTE_ORDER) { |
| 186 | assert(false); |
| 187 | return; |
| 188 | } |
| 189 | break; |
| 190 | } |
| 191 | default: { |
| 192 | assert(false); |
| 193 | return; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); |
| 198 | const ElfW(Phdr) *dynamic_program_header = nullptr; |
| 199 | for (int i = 0; i < ehdr_->e_phnum; ++i) { |
| 200 | const ElfW(Phdr) *const program_header = GetPhdr(i); |
| 201 | switch (program_header->p_type) { |
| 202 | case PT_LOAD: |
| 203 | if (!~link_base_) { |
| 204 | link_base_ = program_header->p_vaddr; |
| 205 | } |
| 206 | break; |
| 207 | case PT_DYNAMIC: |
| 208 | dynamic_program_header = program_header; |
| 209 | break; |
| 210 | } |
| 211 | } |
| 212 | if (!~link_base_ || !dynamic_program_header) { |
| 213 | assert(false); |
| 214 | // Mark this image as not present. Can not recur infinitely. |
| 215 | Init(nullptr); |
| 216 | return; |
| 217 | } |
| 218 | ptrdiff_t relocation = |
| 219 | base_as_char - reinterpret_cast<const char *>(link_base_); |
| 220 | ElfW(Dyn) *dynamic_entry = |
| 221 | reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + |
| 222 | relocation); |
| 223 | for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { |
| 224 | const ElfW(Xword) value = dynamic_entry->d_un.d_val + relocation; |
| 225 | switch (dynamic_entry->d_tag) { |
| 226 | case DT_HASH: |
| 227 | hash_ = reinterpret_cast<ElfW(Word) *>(value); |
| 228 | break; |
| 229 | case DT_SYMTAB: |
| 230 | dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); |
| 231 | break; |
| 232 | case DT_STRTAB: |
| 233 | dynstr_ = reinterpret_cast<const char *>(value); |
| 234 | break; |
| 235 | case DT_VERSYM: |
| 236 | versym_ = reinterpret_cast<ElfW(Versym) *>(value); |
| 237 | break; |
| 238 | case DT_VERDEF: |
| 239 | verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); |
| 240 | break; |
| 241 | case DT_VERDEFNUM: |
| 242 | verdefnum_ = dynamic_entry->d_un.d_val; |
| 243 | break; |
| 244 | case DT_STRSZ: |
| 245 | strsize_ = dynamic_entry->d_un.d_val; |
| 246 | break; |
| 247 | default: |
| 248 | // Unrecognized entries explicitly ignored. |
| 249 | break; |
| 250 | } |
| 251 | } |
| 252 | if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || |
| 253 | !verdef_ || !verdefnum_ || !strsize_) { |
| 254 | assert(false); // invalid VDSO |
| 255 | // Mark this image as not present. Can not recur infinitely. |
| 256 | Init(nullptr); |
| 257 | return; |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | bool ElfMemImage::LookupSymbol(const char *name, |
| 262 | const char *version, |
| 263 | int type, |
| 264 | SymbolInfo *info_out) const { |
| 265 | for (const SymbolInfo& info : *this) { |
| 266 | if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 && |
| 267 | ElfType(info.symbol) == type) { |
| 268 | if (info_out) { |
| 269 | *info_out = info; |
| 270 | } |
| 271 | return true; |
| 272 | } |
| 273 | } |
| 274 | return false; |
| 275 | } |
| 276 | |
| 277 | bool ElfMemImage::LookupSymbolByAddress(const void *address, |
| 278 | SymbolInfo *info_out) const { |
| 279 | for (const SymbolInfo& info : *this) { |
| 280 | const char *const symbol_start = |
| 281 | reinterpret_cast<const char *>(info.address); |
| 282 | const char *const symbol_end = symbol_start + info.symbol->st_size; |
| 283 | if (symbol_start <= address && address < symbol_end) { |
| 284 | if (info_out) { |
| 285 | // Client wants to know details for that symbol (the usual case). |
| 286 | if (ElfBind(info.symbol) == STB_GLOBAL) { |
| 287 | // Strong symbol; just return it. |
| 288 | *info_out = info; |
| 289 | return true; |
| 290 | } else { |
| 291 | // Weak or local. Record it, but keep looking for a strong one. |
| 292 | *info_out = info; |
| 293 | } |
| 294 | } else { |
| 295 | // Client only cares if there is an overlapping symbol. |
| 296 | return true; |
| 297 | } |
| 298 | } |
| 299 | } |
| 300 | return false; |
| 301 | } |
| 302 | |
| 303 | ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) |
| 304 | : index_(index), image_(image) { |
| 305 | } |
| 306 | |
| 307 | const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { |
| 308 | return &info_; |
| 309 | } |
| 310 | |
| 311 | const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { |
| 312 | return info_; |
| 313 | } |
| 314 | |
| 315 | bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { |
| 316 | return this->image_ == rhs.image_ && this->index_ == rhs.index_; |
| 317 | } |
| 318 | |
| 319 | bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { |
| 320 | return !(*this == rhs); |
| 321 | } |
| 322 | |
| 323 | ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { |
| 324 | this->Update(1); |
| 325 | return *this; |
| 326 | } |
| 327 | |
| 328 | ElfMemImage::SymbolIterator ElfMemImage::begin() const { |
| 329 | SymbolIterator it(this, 0); |
| 330 | it.Update(0); |
| 331 | return it; |
| 332 | } |
| 333 | |
| 334 | ElfMemImage::SymbolIterator ElfMemImage::end() const { |
| 335 | return SymbolIterator(this, GetNumSymbols()); |
| 336 | } |
| 337 | |
| 338 | void ElfMemImage::SymbolIterator::Update(int increment) { |
| 339 | const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); |
| 340 | ABSL_RAW_CHECK(image->IsPresent() || increment == 0, ""); |
| 341 | if (!image->IsPresent()) { |
| 342 | return; |
| 343 | } |
| 344 | index_ += increment; |
| 345 | if (index_ >= image->GetNumSymbols()) { |
| 346 | index_ = image->GetNumSymbols(); |
| 347 | return; |
| 348 | } |
| 349 | const ElfW(Sym) *symbol = image->GetDynsym(index_); |
| 350 | const ElfW(Versym) *version_symbol = image->GetVersym(index_); |
| 351 | ABSL_RAW_CHECK(symbol && version_symbol, ""); |
| 352 | const char *const symbol_name = image->GetDynstr(symbol->st_name); |
| 353 | const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; |
| 354 | const ElfW(Verdef) *version_definition = nullptr; |
| 355 | const char *version_name = ""; |
| 356 | if (symbol->st_shndx == SHN_UNDEF) { |
| 357 | // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and |
| 358 | // version_index could well be greater than verdefnum_, so calling |
| 359 | // GetVerdef(version_index) may trigger assertion. |
| 360 | } else { |
| 361 | version_definition = image->GetVerdef(version_index); |
| 362 | } |
| 363 | if (version_definition) { |
| 364 | // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, |
| 365 | // optional 2nd if the version has a parent. |
| 366 | ABSL_RAW_CHECK( |
| 367 | version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2, |
| 368 | "wrong number of entries"); |
| 369 | const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); |
| 370 | version_name = image->GetVerstr(version_aux->vda_name); |
| 371 | } |
| 372 | info_.name = symbol_name; |
| 373 | info_.version = version_name; |
| 374 | info_.address = image->GetSymAddr(symbol); |
| 375 | info_.symbol = symbol; |
| 376 | } |
| 377 | |
| 378 | } // namespace debugging_internal |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 379 | ABSL_NAMESPACE_END |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 380 | } // namespace absl |
| 381 | |
| 382 | #endif // ABSL_HAVE_ELF_MEM_IMAGE |